diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f7b64b03a52fd..b5c062ea6d65e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -48,6 +48,7 @@ Other enhancements - Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`) - Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`) - Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`) +- Enhanced :func:`import_optional_dependency` with context-aware error messages that suggest relevant alternatives when dependencies are missing (:issue:`62001`) - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 7e882bc242394..f43ca2d40e014 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -72,6 +72,119 @@ "tables": "pytables", } +# Mapping of operation contexts to alternative dependencies +OPERATION_CONTEXTS = { + "excel": { + "alternatives": ["openpyxl", "xlsxwriter", "calamine", "xlrd", "pyxlsb", "odfpy"], + "description": "Excel file operations", + }, + "plotting": { + "alternatives": ["matplotlib"], + "description": "plotting operations", + "fallback": "Use df.describe() for text-based data summaries", + }, + "html": { + "alternatives": ["lxml", "html5lib", "beautifulsoup4"], + "description": "HTML parsing", + }, + "xml": { + "alternatives": ["lxml"], + "description": "XML parsing", + }, + "sql": { + "alternatives": ["sqlalchemy", "psycopg2", "pymysql"], + "description": "SQL database operations", + }, + "performance": { + "alternatives": ["numexpr", "bottleneck", "numba"], + "description": "performance acceleration", + "fallback": "Operations will use standard implementations", + }, + "parquet": { + "alternatives": ["pyarrow", "fastparquet"], + "description": "Parquet file operations", + }, + "feather": { + "alternatives": ["pyarrow"], + "description": "Feather file operations", + }, + "orc": { + "alternatives": ["pyarrow"], + "description": "ORC file operations", + }, + "hdf5": { + "alternatives": ["tables"], + "description": "HDF5 file operations", + }, + "spss": { + "alternatives": ["pyreadstat"], + "description": "SPSS file operations", + }, + "style": { + "alternatives": ["jinja2"], + "description": "DataFrame styling operations", + }, + "compression": { + "alternatives": ["zstandard"], + "description": "data compression operations", + }, + "clipboard": { + "alternatives": ["pyqt5", "qtpy"], + "description": "clipboard operations", + }, +} + + +def _build_context_message( + name: str, operation_context: str | None, extra: str, install_name: str +) -> str: + """ + Build an enhanced error message with context-aware alternatives. + + Parameters + ---------- + name : str + The module name that failed to import. + operation_context : str or None + The operation context (e.g., 'excel', 'plotting'). + extra : str + Additional text to include in the ImportError message. + install_name : str + The package name to install. + + Returns + ------- + str + The enhanced error message. + """ + base_msg = f"Missing optional dependency '{install_name}'." + if extra: + base_msg += f" {extra}" + + if operation_context and operation_context in OPERATION_CONTEXTS: + context_info = OPERATION_CONTEXTS[operation_context] + # Filter out the failed dependency from alternatives + alternatives = [ + alt for alt in context_info["alternatives"] + if alt != name and alt != install_name + ] + + if alternatives: + if len(alternatives) == 1: + alt_msg = f" For {context_info['description']}, try installing {alternatives[0]}." + elif len(alternatives) == 2: + alt_msg = f" For {context_info['description']}, try installing {alternatives[0]} or {alternatives[1]}." + else: + alt_list = ", ".join(alternatives[:-1]) + f", or {alternatives[-1]}" + alt_msg = f" For {context_info['description']}, try installing {alt_list}." + base_msg += alt_msg + + if "fallback" in context_info: + base_msg += f" {context_info['fallback']}." + + base_msg += f" Use pip or conda to install {install_name}." + return base_msg + def get_version(module: types.ModuleType) -> str: version = getattr(module, "__version__", None) @@ -91,6 +204,7 @@ def import_optional_dependency( min_version: str | None = ..., *, errors: Literal["raise"] = ..., + operation_context: str | None = ..., ) -> types.ModuleType: ... @@ -101,6 +215,7 @@ def import_optional_dependency( min_version: str | None = ..., *, errors: Literal["warn", "ignore"], + operation_context: str | None = ..., ) -> types.ModuleType | None: ... @@ -110,6 +225,7 @@ def import_optional_dependency( min_version: str | None = None, *, errors: Literal["raise", "warn", "ignore"] = "raise", + operation_context: str | None = None, ) -> types.ModuleType | None: """ Import an optional dependency. @@ -137,6 +253,11 @@ def import_optional_dependency( min_version : str, default None Specify a minimum version that is different from the global pandas minimum version required. + operation_context : str, default None + Provide context about the operation requiring this dependency to show + relevant alternatives in error messages. Supported contexts: 'excel', + 'plotting', 'html', 'xml', 'sql', 'performance', 'parquet', 'feather', + 'orc', 'hdf5', 'spss', 'style', 'compression', 'clipboard'. Returns ------- maybe_module : Optional[ModuleType] @@ -150,10 +271,7 @@ def import_optional_dependency( package_name = INSTALL_MAPPING.get(name) install_name = package_name if package_name is not None else name - msg = ( - f"`Import {install_name}` failed. {extra} " - f"Use pip or conda to install the {install_name} package." - ) + msg = _build_context_message(name, operation_context, extra, install_name) try: module = importlib.import_module(name) except ImportError as err: diff --git a/pandas/io/excel/_calamine.py b/pandas/io/excel/_calamine.py index 0bdd2b42aad51..a77f666bcdd95 100644 --- a/pandas/io/excel/_calamine.py +++ b/pandas/io/excel/_calamine.py @@ -56,7 +56,7 @@ def __init__( engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ - import_optional_dependency("python_calamine") + import_optional_dependency("python_calamine", operation_context="excel") super().__init__( filepath_or_buffer, storage_options=storage_options, diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index f79417d11080d..54e74f45a2f55 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -46,7 +46,7 @@ def __init__( engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ - import_optional_dependency("odf") + import_optional_dependency("odf", operation_context="excel") super().__init__( filepath_or_buffer, storage_options=storage_options, diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 3055c68a93cbc..2b1eb2997084c 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -550,7 +550,7 @@ def __init__( engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ - import_optional_dependency("openpyxl") + import_optional_dependency("openpyxl", operation_context="excel") super().__init__( filepath_or_buffer, storage_options=storage_options, diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index a6e42616c2043..b46ca892f3ec2 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -40,7 +40,7 @@ def __init__( engine_kwargs : dict, optional Arbitrary keyword arguments passed to excel engine. """ - import_optional_dependency("pyxlsb") + import_optional_dependency("pyxlsb", operation_context="excel") # This will call load_workbook on the filepath or buffer # And set the result to the book-attribute super().__init__( diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 5d39a840336eb..02244c09bb78d 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -42,7 +42,7 @@ def __init__( Arbitrary keyword arguments passed to excel engine. """ err_msg = "Install xlrd >= 2.0.1 for xls Excel support" - import_optional_dependency("xlrd", extra=err_msg) + import_optional_dependency("xlrd", extra=err_msg, operation_context="excel") super().__init__( filepath_or_buffer, storage_options=storage_options, diff --git a/pandas/io/html.py b/pandas/io/html.py index 183af3a03221b..e748d904e465e 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -913,10 +913,10 @@ def _parser_dispatch(flavor: HTMLFlavors | None) -> type[_HtmlFrameParser]: ) if flavor in ("bs4", "html5lib"): - import_optional_dependency("html5lib") - import_optional_dependency("bs4") + import_optional_dependency("html5lib", operation_context="html") + import_optional_dependency("bs4", operation_context="html") else: - import_optional_dependency("lxml.etree") + import_optional_dependency("lxml.etree", operation_context="html") return _valid_parsers[flavor] diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 9670b5439c87e..46a389d604906 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -13,6 +13,8 @@ Substitution, ) +from pandas.compat._optional import import_optional_dependency + from pandas.core.dtypes.common import ( is_integer, is_list_like, @@ -1947,15 +1949,13 @@ def _load_backend(backend: str) -> types.ModuleType: from importlib.metadata import entry_points if backend == "matplotlib": - # Because matplotlib is an optional dependency and first-party backend, - # we need to attempt an import here to raise an ImportError if needed. - try: - module = importlib.import_module("pandas.plotting._matplotlib") - except ImportError: - raise ImportError( - "matplotlib is required for plotting when the " - 'default backend "matplotlib" is selected.' - ) from None + # Check for matplotlib dependency with enhanced error message + import_optional_dependency( + "matplotlib", + extra="Required for plotting when the default backend 'matplotlib' is selected.", + operation_context="plotting" + ) + module = importlib.import_module("pandas.plotting._matplotlib") return module found_backend = False diff --git a/pandas/tests/test_optional_dependency.py b/pandas/tests/test_optional_dependency.py index cd276914bfb21..7c0d1537796cb 100644 --- a/pandas/tests/test_optional_dependency.py +++ b/pandas/tests/test_optional_dependency.py @@ -12,7 +12,7 @@ def test_import_optional(): - match = "Import .*notapackage.* pip .* conda .* notapackage" + match = r"Missing optional dependency 'notapackage'.*Use pip or conda to install notapackage" with pytest.raises(ImportError, match=match) as exc_info: import_optional_dependency("notapackage") # The original exception should be there as context: @@ -65,6 +65,69 @@ def test_bad_version(monkeypatch): assert result is None +def test_operation_context_excel(): + match = ( + r"Missing optional dependency 'notapackage'.*" + r"For Excel file operations, try installing openpyxl, xlsxwriter, calamine.*" + r".*Use pip or conda to install notapackage" + ) + with pytest.raises(ImportError, match=match): + import_optional_dependency("notapackage", operation_context="excel") + + +def test_operation_context_plotting(): + match = ( + r"Missing optional dependency 'notapackage'.*" + r"For plotting operations, try installing matplotlib.*" + r"Use df\.describe\(\) for text-based data summaries.*" + r"Use pip or conda to install notapackage" + ) + with pytest.raises(ImportError, match=match): + import_optional_dependency("notapackage", operation_context="plotting") + + +def test_operation_context_with_extra(): + match = ( + r"Missing optional dependency 'notapackage'.*Additional context.*" + r"For Excel file operations, try installing openpyxl, xlsxwriter, calamine.*" + r".*Use pip or conda to install notapackage" + ) + with pytest.raises(ImportError, match=match): + import_optional_dependency( + "notapackage", + extra="Additional context.", + operation_context="excel" + ) + + +def test_operation_context_unknown(): + # Unknown context should fall back to standard behavior + match = r"Missing optional dependency 'notapackage'.*Use pip or conda to install notapackage" + with pytest.raises(ImportError, match=match): + import_optional_dependency("notapackage", operation_context="unknown_context") + + +def test_operation_context_filtering(): + # The failed dependency should be filtered out from alternatives + match = ( + r"Missing optional dependency 'openpyxl'.*" + r"For Excel file operations, try installing xlsxwriter, calamine.*" + r".*Use pip or conda to install openpyxl" + ) + with pytest.raises(ImportError, match=match): + import_optional_dependency("openpyxl", operation_context="excel") + + +def test_operation_context_ignore_errors(): + # operation_context should not affect ignore behavior + result = import_optional_dependency( + "notapackage", + operation_context="excel", + errors="ignore" + ) + assert result is None + + def test_submodule(monkeypatch): # Create a fake module with a submodule name = "fakemodule"