Skip to content

Commit 81353a3

Browse files
authored
perf: use cached property for schema/columns lazy frames (#2093)
1 parent 12afbfe commit 81353a3

File tree

4 files changed

+35
-31
lines changed

4 files changed

+35
-31
lines changed

narwhals/_dask/dataframe.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def __init__(
4747
self._backend_version = backend_version
4848
self._implementation = Implementation.DASK
4949
self._version = version
50+
self._cached_schema: dict[str, DType] | None = None
5051
validate_backend_version(self._implementation, self._backend_version)
5152

5253
def __native_namespace__(self: Self) -> ModuleType:
@@ -132,7 +133,7 @@ def collect(
132133

133134
@property
134135
def columns(self: Self) -> list[str]:
135-
return self._native_frame.columns.tolist()
136+
return list(self.schema)
136137

137138
def filter(self: Self, predicate: DaskExpr) -> Self:
138139
# `[0]` is safe as the predicate's expression only returns a single column
@@ -182,13 +183,15 @@ def drop_nulls(self: Self, subset: list[str] | None) -> Self:
182183

183184
@property
184185
def schema(self: Self) -> dict[str, DType]:
185-
native_dtypes = self._native_frame.dtypes
186-
return {
187-
col: native_to_narwhals_dtype(
188-
native_dtypes[col], self._version, self._implementation
189-
)
190-
for col in self._native_frame.columns
191-
}
186+
if self._cached_schema is None:
187+
native_dtypes = self._native_frame.dtypes
188+
self._cached_schema = {
189+
col: native_to_narwhals_dtype(
190+
native_dtypes[col], self._version, self._implementation
191+
)
192+
for col in self._native_frame.columns
193+
}
194+
return self._cached_schema
192195

193196
def collect_schema(self: Self) -> dict[str, DType]:
194197
return self.schema

narwhals/_duckdb/dataframe.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def __init__(
5555
self._native_frame: duckdb.DuckDBPyRelation = df
5656
self._version = version
5757
self._backend_version = backend_version
58+
self._cached_schema: dict[str, DType] | None = None
5859
validate_backend_version(self._implementation, self._backend_version)
5960

6061
def __narwhals_dataframe__(self: Self) -> Self: # pragma: no cover
@@ -189,16 +190,20 @@ def filter(self: Self, predicate: DuckDBExpr) -> Self:
189190

190191
@property
191192
def schema(self: Self) -> dict[str, DType]:
192-
return {
193-
column_name: native_to_narwhals_dtype(str(duckdb_dtype), self._version)
194-
for column_name, duckdb_dtype in zip(
195-
self._native_frame.columns, self._native_frame.types
196-
)
197-
}
193+
if self._cached_schema is None:
194+
# Note: prefer `self._cached_schema` over `functools.cached_property`
195+
# due to Python3.13 failures.
196+
self._cached_schema = {
197+
column_name: native_to_narwhals_dtype(str(duckdb_dtype), self._version)
198+
for column_name, duckdb_dtype in zip(
199+
self._native_frame.columns, self._native_frame.types
200+
)
201+
}
202+
return self._cached_schema
198203

199204
@property
200205
def columns(self: Self) -> list[str]:
201-
return self._native_frame.columns
206+
return list(self.schema)
202207

203208
def to_pandas(self: Self) -> pd.DataFrame:
204209
# only if version is v1, keep around for backcompat

narwhals/_spark_like/dataframe.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(
5050
self._backend_version = backend_version
5151
self._implementation = implementation
5252
self._version = version
53+
self._cached_schema: dict[str, DType] | None = None
5354
validate_backend_version(self._implementation, self._backend_version)
5455

5556
@property
@@ -178,7 +179,7 @@ def _collect_to_arrow(self) -> pa.Table:
178179

179180
@property
180181
def columns(self: Self) -> list[str]:
181-
return self._native_frame.columns # type: ignore[no-any-return]
182+
return list(self.schema)
182183

183184
def collect(
184185
self: Self,
@@ -266,14 +267,16 @@ def filter(self: Self, predicate: SparkLikeExpr) -> Self:
266267

267268
@property
268269
def schema(self: Self) -> dict[str, DType]:
269-
return {
270-
field.name: native_to_narwhals_dtype(
271-
dtype=field.dataType,
272-
version=self._version,
273-
spark_types=self._native_dtypes,
274-
)
275-
for field in self._native_frame.schema
276-
}
270+
if self._cached_schema is None:
271+
self._cached_schema = {
272+
field.name: native_to_narwhals_dtype(
273+
dtype=field.dataType,
274+
version=self._version,
275+
spark_types=self._native_dtypes,
276+
)
277+
for field in self._native_frame.schema
278+
}
279+
return self._cached_schema
277280

278281
def collect_schema(self: Self) -> dict[str, DType]:
279282
return self.schema
@@ -356,11 +359,6 @@ def join(
356359
left_columns = self.columns
357360
right_columns = other.columns
358361

359-
if isinstance(left_on, str):
360-
left_on = [left_on]
361-
if isinstance(right_on, str):
362-
right_on = [right_on]
363-
364362
# create a mapping for columns on other
365363
# `right_on` columns will be renamed as `left_on`
366364
# the remaining columns will be either added the suffix or left unchanged.

narwhals/_spark_like/utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
from functools import lru_cache
43
from typing import TYPE_CHECKING
54
from typing import Any
65

@@ -20,7 +19,6 @@
2019
from narwhals.utils import Version
2120

2221

23-
@lru_cache(maxsize=16)
2422
def native_to_narwhals_dtype(
2523
dtype: pyspark_types.DataType,
2624
version: Version,

0 commit comments

Comments
 (0)