Skip to content

chore: Add EagerSeries*Namespace protocols #2294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion narwhals/_arrow/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import pyarrow as pa
import pyarrow.compute as pc

from narwhals._compliant.series import _SeriesNamespace
from narwhals.exceptions import ShapeError
from narwhals.utils import _SeriesNamespace
from narwhals.utils import import_dtypes_module
from narwhals.utils import isinstance_or_issubclass

Expand Down
13 changes: 12 additions & 1 deletion narwhals/_compliant/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from narwhals._compliant.namespace import CompliantNamespace
from narwhals._compliant.typing import AliasName
from narwhals._compliant.typing import AliasNames
from narwhals._compliant.typing import CompliantExprT_co
from narwhals._compliant.typing import CompliantFrameT
from narwhals._compliant.typing import CompliantLazyFrameT
from narwhals._compliant.typing import CompliantSeriesOrNativeExprT_co
Expand All @@ -32,7 +33,7 @@
from narwhals.dependencies import get_numpy
from narwhals.dependencies import is_numpy_array
from narwhals.dtypes import DType
from narwhals.utils import _ExprNamespace
from narwhals.utils import _StoresCompliant
from narwhals.utils import deprecated
from narwhals.utils import not_implemented
from narwhals.utils import unstable
Expand Down Expand Up @@ -891,6 +892,16 @@ def _is_expr(cls, obj: Self | Any) -> TypeIs[Self]:
return hasattr(obj, "__narwhals_expr__")


class _ExprNamespace( # type: ignore[misc]
_StoresCompliant[CompliantExprT_co], Protocol[CompliantExprT_co]
):
_compliant_expr: CompliantExprT_co

@property
def compliant(self) -> CompliantExprT_co:
return self._compliant_expr


class EagerExprNamespace(_ExprNamespace[EagerExprT], Generic[EagerExprT]):
def __init__(self, expr: EagerExprT, /) -> None:
self._compliant_expr = expr
Expand Down
85 changes: 85 additions & 0 deletions narwhals/_compliant/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,26 @@

from typing import TYPE_CHECKING
from typing import Any
from typing import Generic
from typing import Iterable
from typing import Iterator
from typing import Literal
from typing import Mapping
from typing import Protocol
from typing import Sequence

from narwhals._compliant.any_namespace import CatNamespace
from narwhals._compliant.any_namespace import DateTimeNamespace
from narwhals._compliant.any_namespace import ListNamespace
from narwhals._compliant.any_namespace import StringNamespace
from narwhals._compliant.any_namespace import StructNamespace
from narwhals._compliant.typing import CompliantSeriesT_co
from narwhals._compliant.typing import EagerSeriesT_co
from narwhals._compliant.typing import NativeSeriesT_co
from narwhals._translate import FromIterable
from narwhals._translate import NumpyConvertible
from narwhals.utils import _StoresCompliant
from narwhals.utils import _StoresNative
from narwhals.utils import unstable

if TYPE_CHECKING:
Expand Down Expand Up @@ -297,3 +307,78 @@ def __narwhals_namespace__(self) -> EagerNamespace[Any, Self, Any]: ...

def _to_expr(self) -> EagerExpr[Any, Any]:
return self.__narwhals_namespace__()._expr._from_series(self) # type: ignore[no-any-return]

@property
def str(self) -> EagerSeriesStringNamespace[Self, NativeSeriesT_co]: ...
@property
def dt(self) -> EagerSeriesDateTimeNamespace[Self, NativeSeriesT_co]: ...
@property
def cat(self) -> EagerSeriesCatNamespace[Self, NativeSeriesT_co]: ...
@property
def list(self) -> EagerSeriesListNamespace[Self, NativeSeriesT_co]: ...
@property
def struct(self) -> EagerSeriesStructNamespace[Self, NativeSeriesT_co]: ...


class _SeriesNamespace( # type: ignore[misc]
_StoresCompliant[CompliantSeriesT_co],
_StoresNative[NativeSeriesT_co],
Protocol[CompliantSeriesT_co, NativeSeriesT_co],
):
_compliant_series: CompliantSeriesT_co

@property
def compliant(self) -> CompliantSeriesT_co:
return self._compliant_series

@property
def native(self) -> NativeSeriesT_co:
return self._compliant_series.native # type: ignore[no-any-return]

def from_native(self, series: Any, /) -> CompliantSeriesT_co:
return self.compliant._from_native_series(series)


class EagerSeriesNamespace(
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
Generic[EagerSeriesT_co, NativeSeriesT_co],
):
_compliant_series: EagerSeriesT_co

def __init__(self, series: EagerSeriesT_co, /) -> None:
self._compliant_series = series


class EagerSeriesCatNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
CatNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...
Comment on lines +323 to +356
Copy link
Member Author

@dangotbanned dangotbanned Mar 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that these are in the right place, I can collect my thoughts on the 3 solutions we can use for Protocol definintions with an __init__.

Problem

Defining a constructor in a Protocol can produce two different bugs.
I've documented that in (#2064 (comment)) - but the short of it is 3.8 and 3.(9|10) have issues.

Solutions

1. Protocol38

This was the first one I discovered (for #2064) and I really dislike it due to the complexity.

"Fixing" 3.8 requires tricking the type checker into thinking typing.Generic is typing.Protocol.

Imports

if not TYPE_CHECKING: # pragma: no cover
if sys.version_info >= (3, 9):
from typing import Protocol as Protocol38
else:
from typing import Generic as Protocol38
else: # pragma: no cover
# TODO @dangotbanned: Remove after dropping `3.8` (#2084)
# - https://github.com/narwhals-dev/narwhals/pull/2064#discussion_r1965921386
from typing import Protocol as Protocol38

The side-effect is every sub-protocol is then required to use that Protocol38 definition or face runtime errors - that can no-longer be caught statically

E   TypeError: Protocols can only inherit from other protocols, got <class 'narwhals._compliant.series.EagerSeriesNamespace'>

"Fixing" 3.(9|10) requires redefining __init__ anyway in the impl.
Which mostly negates the benefit of defining in the Protocol πŸ€¦β€β™‚οΈ

Repeat __init__

def __init__(
self: Self,
call: Callable[[EagerDataFrameT], Sequence[EagerSeriesT]],
*,
depth: int,
function_name: str,
evaluate_output_names: Callable[[EagerDataFrameT], Sequence[str]],
alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None,
implementation: Implementation,
backend_version: tuple[int, ...],
version: Version,
call_kwargs: dict[str, Any] | None = None,
) -> None: ...

def __init__(
self: Self,
call: Callable[[ArrowDataFrame], Sequence[ArrowSeries]],
*,
depth: int,
function_name: str,
evaluate_output_names: Callable[[ArrowDataFrame], Sequence[str]],
alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None,
backend_version: tuple[int, ...],
version: Version,
call_kwargs: dict[str, Any] | None = None,
implementation: Implementation | None = None,
) -> None:
self._call = call
self._depth = depth
self._function_name = function_name
self._depth = depth
self._evaluate_output_names = evaluate_output_names
self._alias_output_names = alias_output_names
self._backend_version = backend_version
self._version = version
self._call_kwargs = call_kwargs or {}
self._metadata: ExprMetadata | None = None

def __init__(
self: Self,
call: Callable[[PandasLikeDataFrame], Sequence[PandasLikeSeries]],
*,
depth: int,
function_name: str,
evaluate_output_names: Callable[[PandasLikeDataFrame], Sequence[str]],
alias_output_names: Callable[[Sequence[str]], Sequence[str]] | None,
implementation: Implementation,
backend_version: tuple[int, ...],
version: Version,
call_kwargs: dict[str, Any] | None = None,
) -> None:
self._call = call
self._depth = depth
self._function_name = function_name
self._evaluate_output_names = evaluate_output_names
self._alias_output_names = alias_output_names
self._implementation = implementation
self._backend_version = backend_version
self._version = version
self._call_kwargs = call_kwargs or {}
self._metadata: ExprMetadata | None = None

2. Define the constructor another way

(#2261) introduced a different solution - but still needed to use Protocol38 to avoid the metaclass issues of the first solution.

I like this a lot more as it feels more intuitive from the outside - given the clear chain of @classmethod and @property definitions.

when-then

def when(self: Self, predicate: ArrowExpr) -> ArrowWhen:
return ArrowWhen.from_expr(predicate, context=self)

class ArrowWhen(EagerWhen[ArrowDataFrame, ArrowSeries, ArrowExpr, "ArrowChunkedArray"]):
@property
def _then(self) -> type[ArrowThen]:
return ArrowThen

@classmethod
def from_expr(cls, condition: ExprT, /, *, context: _FullContext) -> Self:
obj = cls.__new__(cls)
obj._condition = condition
obj._then_value = None
obj._otherwise_value = None
obj._implementation = context._implementation
obj._backend_version = context._backend_version
obj._version = context._version
return obj

@property
def _then(self) -> type[CompliantThen[FrameT, SeriesT, ExprT]]: ...

def then(
self, value: IntoExpr[SeriesT, ExprT], /
) -> CompliantThen[FrameT, SeriesT, ExprT]:
return self._then.from_when(self, value)

@classmethod
def from_when(
cls,
when: CompliantWhen[FrameT, SeriesT, ExprT],
then: IntoExpr[SeriesT, ExprT],
/,
) -> Self:
when._then_value = then
obj = cls.__new__(cls)
obj._call = when
obj._when_value = when
obj._depth = 0
obj._function_name = "whenthen"
obj._evaluate_output_names = getattr(
then, "_evaluate_output_names", lambda _df: ["literal"]
)
obj._alias_output_names = getattr(then, "_alias_output_names", None)
obj._implementation = when._implementation
obj._backend_version = when._backend_version
obj._version = when._version
obj._call_kwargs = {}
return obj

The only downside I could see is __new__ being a less-understood method than __init__.
Personally it took me a while to understand when it makes sense to use.
Even now that I do - I didn't think of it as a solution until a month after (#2064) merged πŸ˜…

3. Avoid constructors entirely in Protocol(s)

That is the solution this comment is attached to.

It might be easy to miss, but _SeriesNamespace is a Protocol and EagerSeriesNamespace is a Generic class.

The interesting aspect of that is the Protocol(s) represent the interface - but the concrete impl is separate.
This approach makes sense when the interface and implementation differ greatly.
Here it is ArrowSeries and PandasLikeSeries - but we go fully structural for all of Polars



class EagerSeriesDateTimeNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
DateTimeNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...


class EagerSeriesListNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
ListNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...


class EagerSeriesStringNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
StringNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...


class EagerSeriesStructNamespace( # type: ignore[misc]
_SeriesNamespace[EagerSeriesT_co, NativeSeriesT_co],
StructNamespace[EagerSeriesT_co],
Protocol[EagerSeriesT_co, NativeSeriesT_co],
): ...
4 changes: 4 additions & 0 deletions narwhals/_compliant/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,14 @@
NativeFrameT_co = TypeVar("NativeFrameT_co", bound="NativeFrame", covariant=True)

CompliantExprT = TypeVar("CompliantExprT", bound=CompliantExprAny)
CompliantExprT_co = TypeVar("CompliantExprT_co", bound=CompliantExprAny, covariant=True)
CompliantExprT_contra = TypeVar(
"CompliantExprT_contra", bound=CompliantExprAny, contravariant=True
)
CompliantSeriesT = TypeVar("CompliantSeriesT", bound=CompliantSeriesAny)
CompliantSeriesT_co = TypeVar(
"CompliantSeriesT_co", bound=CompliantSeriesAny, covariant=True
)
CompliantSeriesOrNativeExprT = TypeVar(
"CompliantSeriesOrNativeExprT", bound=CompliantSeriesOrNativeExprAny
)
Expand Down
6 changes: 6 additions & 0 deletions narwhals/_pandas_like/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,8 +1079,14 @@ def cat(self: Self) -> PandasLikeSeriesCatNamespace:

@property
def list(self: Self) -> PandasLikeSeriesListNamespace:
if not hasattr(self.native, "list"):
msg = "Series must be of PyArrow List type to support list namespace."
raise TypeError(msg)
return PandasLikeSeriesListNamespace(self)

@property
def struct(self: Self) -> PandasLikeSeriesStructNamespace:
if not hasattr(self.native, "struct"):
msg = "Series must be of PyArrow Struct type to support struct namespace."
raise TypeError(msg)
return PandasLikeSeriesStructNamespace(self)
20 changes: 9 additions & 11 deletions narwhals/_pandas_like/series_cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing_extensions import Self
from narwhals._compliant.any_namespace import CatNamespace
from narwhals._pandas_like.utils import PandasLikeSeriesNamespace

if TYPE_CHECKING:
from narwhals._pandas_like.series import PandasLikeSeries


class PandasLikeSeriesCatNamespace:
def __init__(self: Self, series: PandasLikeSeries) -> None:
self._compliant_series = series

def get_categories(self: Self) -> PandasLikeSeries:
s = self._compliant_series._native_series
return self._compliant_series._from_native_series(
s.__class__(s.cat.categories, name=s.name)
)
class PandasLikeSeriesCatNamespace(
PandasLikeSeriesNamespace, CatNamespace["PandasLikeSeries"]
):
def get_categories(self) -> PandasLikeSeries:
s = self.native
return self.from_native(type(s)(s.cat.categories, name=s.name))
Loading
Loading