Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

ENH: Support third-party execution engines in Series.map#61467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
12 commits
Select commitHold shift + click to select a range
6f61d7b
ENH: Adding engine parameter to Series.map
datapythonistaMar 15, 2025
ea45245
Merge remote-tracking branch 'upstream/main' into series_map_engine
datapythonistaApr 12, 2025
ef62074
Add missing file
datapythonistaApr 12, 2025
b5e5519
Fixing bug when executor returns a numpy array
datapythonistaApr 13, 2025
30ca3bd
engine with no function and tests
datapythonistaApr 14, 2025
b32ae65
Merge remote-tracking branch 'upstream/main' into series_map_engine
datapythonistaApr 14, 2025
c3afd05
Merge from main
datapythonistaMay 20, 2025
4a3bcfa
Last fixes
datapythonistaMay 20, 2025
e838c4c
Fix CI
datapythonistaMay 20, 2025
cae63ac
Add fixture import back
datapythonistaMay 20, 2025
a4d8b4a
Move mock execution class to conftest
datapythonistaMay 27, 2025
56c3ce0
Adding commit about imported fixture
datapythonistaMay 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletionsdoc/source/whatsnew/v3.0.0.rst
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -73,6 +73,7 @@ Other enhancements
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
- :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
- :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
Expand Down
39 changes: 39 additions & 0 deletionspandas/core/series.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -4326,6 +4326,7 @@ def map(
self,
func: Callable | Mapping | Series | None = None,
na_action: Literal["ignore"] | None = None,
engine: Callable | None = None,
**kwargs,
) -> Series:
"""
Expand All@@ -4342,6 +4343,25 @@ def map(
na_action : {None, 'ignore'}, default None
If 'ignore', propagate NaN values, without passing them to the
mapping correspondence.
engine : decorator, optional
Choose the execution engine to use to run the function. Only used for
functions. If ``map`` is called with a mapping or ``Series``, an
exception will be raised. If ``engine`` is not provided the function will
be executed by the regular Python interpreter.

Options include JIT compilers such as Numba, Bodo or Blosc2, which in some
cases can speed up the execution. To use an executor you can provide the
decorators ``numba.jit``, ``numba.njit``, ``bodo.jit`` or ``blosc2.jit``.
You can also provide the decorator with parameters, like
``numba.jit(nogit=True)``.

Not all functions can be executed with all execution engines. In general,
JIT compilers will require type stability in the function (no variable
should change data type during the execution). And not all pandas and
NumPy APIs are supported. Check the engine documentation for limitations.

.. versionadded:: 3.0.0

**kwargs
Additional keyword arguments to pass as keywords arguments to
`arg`.
Expand DownExpand Up@@ -4421,6 +4441,25 @@ def map(
else:
raise ValueError("The `func` parameter is required")

if engine is not None:
if not callable(func):
raise ValueError(
"The engine argument can only be specified when func is a function"
)
if not hasattr(engine, "__pandas_udf__"):
raise ValueError(f"Not a valid engine: {engine!r}")
result = engine.__pandas_udf__.map( # type: ignore[attr-defined]
data=self,
func=func,
args=(),
kwargs=kwargs,
decorator=engine,
skip_na=na_action == "ignore",
)
if not isinstance(result, Series):
result = Series(result, index=self.index, name=self.name)
return result.__finalize__(self, method="map")

if callable(func):
func = functools.partial(func, **kwargs)
new_values = self._map_values(func, na_action=na_action)
Expand Down
63 changes: 63 additions & 0 deletionspandas/tests/apply/conftest.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
import numpy as np
import pytest

from pandas import (
DataFrame,
Series,
)
from pandas.api.executors import BaseExecutionEngine


class MockExecutionEngine(BaseExecutionEngine):
"""
Execution Engine to test if the execution engine interface receives and
uses all parameters provided by the user.

Making this engine work as the default Python engine by calling it, no extra
functionality is implemented here.

When testing, this will be called when this engine is provided, and then the
same pandas.map and pandas.apply function will be called, but without engine,
executing the default behavior from the python engine.
"""

def map(data, func, args, kwargs, decorator, skip_na):
kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {}
return data.map(func, na_action="ignore" if skip_na else None, **kwargs_to_pass)

def apply(data, func, args, kwargs, decorator, axis):
if isinstance(data, Series):
return data.apply(func, convert_dtype=True, args=args, by_row=False)
elif isinstance(data, DataFrame):
return data.apply(
func,
axis=axis,
raw=False,
result_type=None,
args=args,
by_row="compat",
**kwargs,
)
else:
assert isinstance(data, np.ndarray)

def wrap_function(func):
# https://github.com/numpy/numpy/issues/8352
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if isinstance(result, str):
result = np.array(result, dtype=object)
return result

return wrapper

return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs)


class MockEngineDecorator:
__pandas_udf__ = MockExecutionEngine


@pytest.fixture(params=[None, MockEngineDecorator])
def engine(request):
return request.param
54 changes: 1 addition & 53 deletionspandas/tests/apply/test_frame_apply.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -17,63 +17,11 @@
date_range,
)
import pandas._testing as tm
from pandas.api.executors importBaseExecutionEngine
from pandas.tests.apply.conftest importMockEngineDecorator
from pandas.tests.frame.common import zip_frames
from pandas.util.version import Version


class MockExecutionEngine(BaseExecutionEngine):
"""
Execution Engine to test if the execution engine interface receives and
uses all parameters provided by the user.

Making this engine work as the default Python engine by calling it, no extra
functionality is implemented here.

When testing, this will be called when this engine is provided, and then the
same pandas.map and pandas.apply function will be called, but without engine,
executing the default behavior from the python engine.
"""

def map(data, func, args, kwargs, decorator, skip_na):
kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {}
return data.map(
func, action_na="ignore" if skip_na else False, **kwargs_to_pass
)

def apply(data, func, args, kwargs, decorator, axis):
if isinstance(data, Series):
return data.apply(func, convert_dtype=True, args=args, by_row=False)
elif isinstance(data, DataFrame):
return data.apply(
func,
axis=axis,
raw=False,
result_type=None,
args=args,
by_row="compat",
**kwargs,
)
else:
assert isinstance(data, np.ndarray)

def wrap_function(func):
# https://github.com/numpy/numpy/issues/8352
def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
if isinstance(result, str):
result = np.array(result, dtype=object)
return result

return wrapper

return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs)


class MockEngineDecorator:
__pandas_udf__ = MockExecutionEngine


@pytest.fixture
def int_frame_const_col():
"""
Expand Down
4 changes: 2 additions & 2 deletionspandas/tests/apply/test_series_apply.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -376,13 +376,13 @@ def test_demo():


@pytest.mark.parametrize("func", [str, lambda x: str(x)])
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row, engine):
# test that we are evaluating row-by-row first if by_row="compat"
# else vectorized evaluation
result = string_series.apply(func, by_row=by_row)

if by_row:
expected = string_series.map(func)
expected = string_series.map(func, engine=engine)
tm.assert_series_equal(result, expected)
else:
assert result == str(string_series)
Expand Down
37 changes: 30 additions & 7 deletionspandas/tests/series/methods/test_map.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -21,6 +21,10 @@
)
import pandas._testing as tm

# The fixture it's mostly used in pandas/tests/apply, so it's defined in that
# conftest, which is out of scope here. So we need to manually import
from pandas.tests.apply.conftest import engine # noqa: F401
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

Curious why this needs importing since it's already in theconftest.py

Copy link
MemberAuthor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

Good question. I answered in a comment, so readers of that input don't need to ask themselves it. Most tests related to apply/map are intests/apply, so the fixture is defined there. But it's also useful here intests/series/methods/.test/apply/conftest.py is not in scope when runningtests/series/methods, so I need to import manually in order to use it. Another alternative would be to move the fixture to the globalconftest.py, but I think this approach keeps things better organized and simple.

I moved the mock classes toconftest.py as suggested, thanks for the review.

Copy link
Member

@mroeschkemroeschkeMay 27, 2025
edited
Loading

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

I think if your work in#61125 continues to expand, I would be OK moving this to the globalconftest.py



def test_series_map_box_timedelta():
# GH#11349
Expand All@@ -32,16 +36,20 @@ def f(x):
ser.map(f)


def test_map_callable(datetime_series):
def test_map_callable(datetime_series, engine): # noqa: F811
with np.errstate(all="ignore"):
tm.assert_series_equal(datetime_series.map(np.sqrt), np.sqrt(datetime_series))
tm.assert_series_equal(
datetime_series.map(np.sqrt, engine=engine), np.sqrt(datetime_series)
)

# map function element-wise
tm.assert_series_equal(datetime_series.map(math.exp), np.exp(datetime_series))
tm.assert_series_equal(
datetime_series.map(math.exp, engine=engine), np.exp(datetime_series)
)

# empty series
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
rs = s.map(lambda x: x)
rs = s.map(lambda x: x, engine=engine)
tm.assert_series_equal(s, rs)

# check all metadata (GH 9322)
Expand All@@ -52,7 +60,7 @@ def test_map_callable(datetime_series):

# index but no data
s = Series(index=[1, 2, 3], dtype=np.float64)
rs = s.map(lambda x: x)
rs = s.map(lambda x: x, engine=engine)
tm.assert_series_equal(s, rs)


Expand DownExpand Up@@ -269,10 +277,10 @@ def test_map_decimal(string_series):
assert isinstance(result.iloc[0], Decimal)


def test_map_na_exclusion():
def test_map_na_exclusion(engine): # noqa: F811
s = Series([1.5, np.nan, 3, np.nan, 5])

result = s.map(lambda x: x * 2, na_action="ignore")
result = s.map(lambda x: x * 2, na_action="ignore", engine=engine)
exp = s * 2
tm.assert_series_equal(result, exp)

Expand DownExpand Up@@ -628,3 +636,18 @@ def test_map_no_func_or_arg():
def test_map_func_is_none():
with pytest.raises(ValueError, match="The `func` parameter is required"):
Series([1, 2]).map(func=None)


@pytest.mark.parametrize("func", [{}, {1: 2}, Series([3, 4])])
def test_map_engine_no_function(func):
s = Series([1, 2])

with pytest.raises(ValueError, match="engine argument can only be specified"):
s.map(func, engine="something")


def test_map_engine_not_executor():
s = Series([1, 2])

with pytest.raises(ValueError, match="Not a valid engine: 'something'"):
s.map(lambda x: x, engine="something")
Loading

[8]ページ先頭

©2009-2025 Movatter.jp