Skip to content

Fix for issue 62001; ENH: Context-aware error messages for optional dependencies #62003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Other enhancements
- Allow dictionaries to be passed to :meth:`pandas.Series.str.replace` via ``pat`` parameter (:issue:`51748`)
- Support passing a :class:`Series` input to :func:`json_normalize` that retains the :class:`Series` :class:`Index` (:issue:`51452`)
- Support reading value labels from Stata 108-format (Stata 6) and earlier files (:issue:`58154`)
- Enhanced :func:`import_optional_dependency` with context-aware error messages that suggest relevant alternatives when dependencies are missing (:issue:`62001`)
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
Expand Down
126 changes: 122 additions & 4 deletions pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,119 @@
"tables": "pytables",
}

# Mapping of operation contexts to alternative dependencies
OPERATION_CONTEXTS = {
"excel": {
"alternatives": ["openpyxl", "xlsxwriter", "calamine", "xlrd", "pyxlsb", "odfpy"],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These may be alternatives, but they may not be. They are not completely equivalent. If a user is doing .write_excel(..., engine="openpyxl"), I don't think we should be suggesting they try installing xlsxwriter.

"description": "Excel file operations",
},
"plotting": {
"alternatives": ["matplotlib"],
"description": "plotting operations",
"fallback": "Use df.describe() for text-based data summaries",
},
"html": {
"alternatives": ["lxml", "html5lib", "beautifulsoup4"],
"description": "HTML parsing",
},
"xml": {
"alternatives": ["lxml"],
"description": "XML parsing",
},
"sql": {
"alternatives": ["sqlalchemy", "psycopg2", "pymysql"],
"description": "SQL database operations",
},
"performance": {
"alternatives": ["numexpr", "bottleneck", "numba"],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are "alternatives" but only in a very loose sense. They are not alternatives for the same operations.

"description": "performance acceleration",
"fallback": "Operations will use standard implementations",
},
"parquet": {
"alternatives": ["pyarrow", "fastparquet"],
"description": "Parquet file operations",
},
"feather": {
"alternatives": ["pyarrow"],
"description": "Feather file operations",
},
"orc": {
"alternatives": ["pyarrow"],
"description": "ORC file operations",
},
"hdf5": {
"alternatives": ["tables"],
"description": "HDF5 file operations",
},
"spss": {
"alternatives": ["pyreadstat"],
"description": "SPSS file operations",
},
"style": {
"alternatives": ["jinja2"],
"description": "DataFrame styling operations",
},
"compression": {
"alternatives": ["zstandard"],
"description": "data compression operations",
},
"clipboard": {
"alternatives": ["pyqt5", "qtpy"],
"description": "clipboard operations",
},
}


def _build_context_message(
name: str, operation_context: str | None, extra: str, install_name: str
) -> str:
"""
Build an enhanced error message with context-aware alternatives.

Parameters
----------
name : str
The module name that failed to import.
operation_context : str or None
The operation context (e.g., 'excel', 'plotting').
extra : str
Additional text to include in the ImportError message.
install_name : str
The package name to install.

Returns
-------
str
The enhanced error message.
"""
base_msg = f"Missing optional dependency '{install_name}'."
if extra:
base_msg += f" {extra}"

if operation_context and operation_context in OPERATION_CONTEXTS:
context_info = OPERATION_CONTEXTS[operation_context]
# Filter out the failed dependency from alternatives
alternatives = [
alt for alt in context_info["alternatives"]
if alt != name and alt != install_name
]

if alternatives:
if len(alternatives) == 1:
alt_msg = f" For {context_info['description']}, try installing {alternatives[0]}."
elif len(alternatives) == 2:
alt_msg = f" For {context_info['description']}, try installing {alternatives[0]} or {alternatives[1]}."
else:
alt_list = ", ".join(alternatives[:-1]) + f", or {alternatives[-1]}"
alt_msg = f" For {context_info['description']}, try installing {alt_list}."
base_msg += alt_msg

if "fallback" in context_info:
base_msg += f" {context_info['fallback']}."

base_msg += f" Use pip or conda to install {install_name}."
return base_msg


def get_version(module: types.ModuleType) -> str:
version = getattr(module, "__version__", None)
Expand All @@ -91,6 +204,7 @@ def import_optional_dependency(
min_version: str | None = ...,
*,
errors: Literal["raise"] = ...,
operation_context: str | None = ...,
) -> types.ModuleType: ...


Expand All @@ -101,6 +215,7 @@ def import_optional_dependency(
min_version: str | None = ...,
*,
errors: Literal["warn", "ignore"],
operation_context: str | None = ...,
) -> types.ModuleType | None: ...


Expand All @@ -110,6 +225,7 @@ def import_optional_dependency(
min_version: str | None = None,
*,
errors: Literal["raise", "warn", "ignore"] = "raise",
operation_context: str | None = None,
) -> types.ModuleType | None:
"""
Import an optional dependency.
Expand Down Expand Up @@ -137,6 +253,11 @@ def import_optional_dependency(
min_version : str, default None
Specify a minimum version that is different from the global pandas
minimum version required.
operation_context : str, default None
Provide context about the operation requiring this dependency to show
relevant alternatives in error messages. Supported contexts: 'excel',
'plotting', 'html', 'xml', 'sql', 'performance', 'parquet', 'feather',
'orc', 'hdf5', 'spss', 'style', 'compression', 'clipboard'.
Returns
-------
maybe_module : Optional[ModuleType]
Expand All @@ -150,10 +271,7 @@ def import_optional_dependency(
package_name = INSTALL_MAPPING.get(name)
install_name = package_name if package_name is not None else name

msg = (
f"`Import {install_name}` failed. {extra} "
f"Use pip or conda to install the {install_name} package."
)
msg = _build_context_message(name, operation_context, extra, install_name)
try:
module = importlib.import_module(name)
except ImportError as err:
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_calamine.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
"""
import_optional_dependency("python_calamine")
import_optional_dependency("python_calamine", operation_context="excel")
super().__init__(
filepath_or_buffer,
storage_options=storage_options,
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
"""
import_optional_dependency("odf")
import_optional_dependency("odf", operation_context="excel")
super().__init__(
filepath_or_buffer,
storage_options=storage_options,
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_openpyxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ def __init__(
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
"""
import_optional_dependency("openpyxl")
import_optional_dependency("openpyxl", operation_context="excel")
super().__init__(
filepath_or_buffer,
storage_options=storage_options,
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_pyxlsb.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
"""
import_optional_dependency("pyxlsb")
import_optional_dependency("pyxlsb", operation_context="excel")
# This will call load_workbook on the filepath or buffer
# And set the result to the book-attribute
super().__init__(
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel/_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(
Arbitrary keyword arguments passed to excel engine.
"""
err_msg = "Install xlrd >= 2.0.1 for xls Excel support"
import_optional_dependency("xlrd", extra=err_msg)
import_optional_dependency("xlrd", extra=err_msg, operation_context="excel")
super().__init__(
filepath_or_buffer,
storage_options=storage_options,
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,10 +913,10 @@ def _parser_dispatch(flavor: HTMLFlavors | None) -> type[_HtmlFrameParser]:
)

if flavor in ("bs4", "html5lib"):
import_optional_dependency("html5lib")
import_optional_dependency("bs4")
import_optional_dependency("html5lib", operation_context="html")
import_optional_dependency("bs4", operation_context="html")
else:
import_optional_dependency("lxml.etree")
import_optional_dependency("lxml.etree", operation_context="html")
return _valid_parsers[flavor]


Expand Down
18 changes: 9 additions & 9 deletions pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
Substitution,
)

from pandas.compat._optional import import_optional_dependency

from pandas.core.dtypes.common import (
is_integer,
is_list_like,
Expand Down Expand Up @@ -1947,15 +1949,13 @@ def _load_backend(backend: str) -> types.ModuleType:
from importlib.metadata import entry_points

if backend == "matplotlib":
# Because matplotlib is an optional dependency and first-party backend,
# we need to attempt an import here to raise an ImportError if needed.
try:
module = importlib.import_module("pandas.plotting._matplotlib")
except ImportError:
raise ImportError(
"matplotlib is required for plotting when the "
'default backend "matplotlib" is selected.'
) from None
# Check for matplotlib dependency with enhanced error message
import_optional_dependency(
"matplotlib",
extra="Required for plotting when the default backend 'matplotlib' is selected.",
operation_context="plotting"
)
module = importlib.import_module("pandas.plotting._matplotlib")
return module

found_backend = False
Expand Down
65 changes: 64 additions & 1 deletion pandas/tests/test_optional_dependency.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


def test_import_optional():
match = "Import .*notapackage.* pip .* conda .* notapackage"
match = r"Missing optional dependency 'notapackage'.*Use pip or conda to install notapackage"
with pytest.raises(ImportError, match=match) as exc_info:
import_optional_dependency("notapackage")
# The original exception should be there as context:
Expand Down Expand Up @@ -65,6 +65,69 @@ def test_bad_version(monkeypatch):
assert result is None


def test_operation_context_excel():
match = (
r"Missing optional dependency 'notapackage'.*"
r"For Excel file operations, try installing openpyxl, xlsxwriter, calamine.*"
r".*Use pip or conda to install notapackage"
)
with pytest.raises(ImportError, match=match):
import_optional_dependency("notapackage", operation_context="excel")


def test_operation_context_plotting():
match = (
r"Missing optional dependency 'notapackage'.*"
r"For plotting operations, try installing matplotlib.*"
r"Use df\.describe\(\) for text-based data summaries.*"
r"Use pip or conda to install notapackage"
)
with pytest.raises(ImportError, match=match):
import_optional_dependency("notapackage", operation_context="plotting")


def test_operation_context_with_extra():
match = (
r"Missing optional dependency 'notapackage'.*Additional context.*"
r"For Excel file operations, try installing openpyxl, xlsxwriter, calamine.*"
r".*Use pip or conda to install notapackage"
)
with pytest.raises(ImportError, match=match):
import_optional_dependency(
"notapackage",
extra="Additional context.",
operation_context="excel"
)


def test_operation_context_unknown():
# Unknown context should fall back to standard behavior
match = r"Missing optional dependency 'notapackage'.*Use pip or conda to install notapackage"
with pytest.raises(ImportError, match=match):
import_optional_dependency("notapackage", operation_context="unknown_context")


def test_operation_context_filtering():
# The failed dependency should be filtered out from alternatives
match = (
r"Missing optional dependency 'openpyxl'.*"
r"For Excel file operations, try installing xlsxwriter, calamine.*"
r".*Use pip or conda to install openpyxl"
)
with pytest.raises(ImportError, match=match):
import_optional_dependency("openpyxl", operation_context="excel")


def test_operation_context_ignore_errors():
# operation_context should not affect ignore behavior
result = import_optional_dependency(
"notapackage",
operation_context="excel",
errors="ignore"
)
assert result is None


def test_submodule(monkeypatch):
# Create a fake module with a submodule
name = "fakemodule"
Expand Down
Loading