From fce3f3593ecb088c0d4dfb37630031fb2f54065f Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 8 Nov 2024 12:53:28 +0100 Subject: [PATCH 1/7] Extend DataFrame parameter support to other libraries --- param/parameters.py | 52 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/param/parameters.py b/param/parameters.py index e65510e43..ec6ba6d60 100644 --- a/param/parameters.py +++ b/param/parameters.py @@ -2277,7 +2277,11 @@ def deserialize(cls, value): class DataFrame(ClassSelector): """ - Parameter whose value is a pandas DataFrame. + Parameter whose value is a DataFrame of one of the enabled libraries. + + The supported libraries can be controlled with the libraries argument. + Currently pandas is supported by default and both pandas and polars + can be enabled. The structure of the DataFrame can be constrained by the rows and columns arguments: @@ -2294,12 +2298,14 @@ class DataFrame(ClassSelector): same columns and in the same order and no other columns. """ - __slots__ = ['rows', 'columns', 'ordered'] + __slots__ = ['rows', 'columns', 'ordered', 'libraries'] _slot_defaults = _dict_update( - ClassSelector._slot_defaults, rows=None, columns=None, ordered=None + ClassSelector._slot_defaults, rows=None, columns=None, ordered=None, libraries=None ) + _supported_libraries = ('pandas', 'polars') + @typing.overload def __init__( self, @@ -2311,14 +2317,44 @@ def __init__( ... @_deprecate_positional_args - def __init__(self, default=Undefined, *, rows=Undefined, columns=Undefined, ordered=Undefined, **params): - from pandas import DataFrame as pdDFrame + def __init__(self, default=Undefined, *, rows=Undefined, columns=Undefined, ordered=Undefined, libraries=Undefined, **params): + if libraries in (None, Undefined): + libraries = ('pandas',) + elif any(l not in self._supported_libraries for l in libraries): + raise ValueError(f'DataFrame parameter libraries must be one of {self._supported_libraries}') self.rows = rows self.columns = columns self.ordered = ordered - super().__init__(default=default, class_=pdDFrame, **params) + self.libraries = libraries + super().__init__(default=default, class_=None, **params) self._validate(self.default) + def _validate_class_(self, val, class_, is_instance): + pass + + def _validate_library_(self, val, libraries): + if 'pandas' in libraries and 'pandas' in sys.modules: + try: + import pandas as pd + if isinstance(val, pd.DataFrame): + return + except Exception: + pass + if 'polars' in libraries: + try: + import polars as pl + if isinstance(val, (pl.DataFrame, pl.LazyFrame)): + return + except Exception: + pass + if len(libraries) > 1: + supported = ','.join(libraries[:-1]) + ' or ' + libraries[-1] + else: + supported = libraries[0] + raise ValueError( + f'DataFrame parameter value {type(val)} is not a {supported} DataFrame.' + ) + def _length_bounds_check(self, bounds, length, name): message = f'{name} length {length} does not match declared bounds of {bounds}' if not isinstance(bounds, tuple): @@ -2344,6 +2380,8 @@ def _validate(self, val): if self.allow_None and val is None: return + self._validate_library_(val, self.libraries) + if self.columns is None: pass elif (isinstance(self.columns, tuple) and len(self.columns)==2 @@ -2374,6 +2412,8 @@ def _validate(self, val): def serialize(cls, value): if value is None: return None + if hasattr(value, 'to_dicts'): + return value.to_dicts() return value.to_dict('records') @classmethod From ae8ca7578519efbe5bb84fda1436cdd0df217a0b Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 8 Nov 2024 13:10:39 +0100 Subject: [PATCH 2/7] Simplify --- param/parameters.py | 45 +++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 28 deletions(-) diff --git a/param/parameters.py b/param/parameters.py index ec6ba6d60..0b4e03403 100644 --- a/param/parameters.py +++ b/param/parameters.py @@ -2309,7 +2309,7 @@ class DataFrame(ClassSelector): @typing.overload def __init__( self, - default=None, *, rows=None, columns=None, ordered=None, is_instance=True, + default=None, *, rows=None, columns=None, ordered=None, libraries=None, is_instance=True, allow_None=False, doc=None, label=None, precedence=None, instantiate=True, constant=False, readonly=False, pickle_default_value=True, per_instance=True, allow_refs=False, nested_refs=False @@ -2329,31 +2329,22 @@ def __init__(self, default=Undefined, *, rows=Undefined, columns=Undefined, orde super().__init__(default=default, class_=None, **params) self._validate(self.default) - def _validate_class_(self, val, class_, is_instance): - pass - - def _validate_library_(self, val, libraries): - if 'pandas' in libraries and 'pandas' in sys.modules: - try: - import pandas as pd - if isinstance(val, pd.DataFrame): - return - except Exception: - pass - if 'polars' in libraries: - try: - import polars as pl - if isinstance(val, (pl.DataFrame, pl.LazyFrame)): - return - except Exception: - pass - if len(libraries) > 1: - supported = ','.join(libraries[:-1]) + ' or ' + libraries[-1] - else: - supported = libraries[0] - raise ValueError( - f'DataFrame parameter value {type(val)} is not a {supported} DataFrame.' - ) + @property + def class_(self): + types = () + if 'pandas' in self.libraries and 'pandas' in sys.modules: + import pandas as pd + types += (pd.DataFrame,) + if 'polars' in self.libraries and 'polars' in sys.modules: + import polaras as pl + types += (pl.DataFrame, pl.LazyFrame) + if not types: + return type(None) + return types if len(types) > 1 else types[0] + + @class_.setter + def class_(self, value): + pass # This is automatically determined from the libraries def _length_bounds_check(self, bounds, length, name): message = f'{name} length {length} does not match declared bounds of {bounds}' @@ -2380,8 +2371,6 @@ def _validate(self, val): if self.allow_None and val is None: return - self._validate_library_(val, self.libraries) - if self.columns is None: pass elif (isinstance(self.columns, tuple) and len(self.columns)==2 From d8ea289fd409d7a5d4c6458cd5b163113dd2e613 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 8 Nov 2024 13:17:08 +0100 Subject: [PATCH 3/7] Update param/parameters.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Simon Høxbro Hansen --- param/parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/param/parameters.py b/param/parameters.py index 0b4e03403..7f379b720 100644 --- a/param/parameters.py +++ b/param/parameters.py @@ -2336,7 +2336,7 @@ def class_(self): import pandas as pd types += (pd.DataFrame,) if 'polars' in self.libraries and 'polars' in sys.modules: - import polaras as pl + import polars as pl types += (pl.DataFrame, pl.LazyFrame) if not types: return type(None) From 80752ac7c5c4ae742fc5cf03c7035526b398a30e Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Fri, 8 Nov 2024 13:23:44 +0100 Subject: [PATCH 4/7] Update param/parameters.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Simon Høxbro Hansen --- param/parameters.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/param/parameters.py b/param/parameters.py index 7f379b720..dc1c38def 100644 --- a/param/parameters.py +++ b/param/parameters.py @@ -2401,6 +2401,8 @@ def _validate(self, val): def serialize(cls, value): if value is None: return None + if hasattr(value, 'collect'): + value = value.collect() # Polars LazyFrame if hasattr(value, 'to_dicts'): return value.to_dicts() return value.to_dict('records') From 851a2a7f5264117c76d99282e6c87dc82cfd9527 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 3 Nov 2025 12:13:50 +0100 Subject: [PATCH 5/7] Implement validation using Narwhals --- param/parameters.py | 251 ++++++++++++++++++++++++++++++++------------ 1 file changed, 186 insertions(+), 65 deletions(-) diff --git a/param/parameters.py b/param/parameters.py index dc1c38def..43b499a50 100644 --- a/param/parameters.py +++ b/param/parameters.py @@ -2277,70 +2277,107 @@ def deserialize(cls, value): class DataFrame(ClassSelector): """ - Parameter whose value is a DataFrame of one of the enabled libraries. + Parameter whose value is a tabular DataFrame. - The supported libraries can be controlled with the libraries argument. - Currently pandas is supported by default and both pandas and polars - can be enabled. + This parameter accepts: - The structure of the DataFrame can be constrained by the rows and - columns arguments: + - A pandas ``DataFrame`` (always supported). + - If Narwhals is installed, any Narwhals-compatible **eager** or + **lazy** data frame, including those from pandas, Polars, cuDF, etc. + Lazy data frames are only accepted if ``allow_lazy=True``. - rows: If specified, may be a number or an integer bounds tuple to - constrain the allowable number of rows. + The parameter validates the frame’s **shape** and **column names** + while remaining backend-agnostic. - columns: If specified, may be a number, an integer bounds tuple, a - list or a set. If the argument is numeric, constrains the number of - columns using the same semantics as used for rows. If either a list - or set of strings, the column names will be validated. If a set is - used, the supplied DataFrame must contain the specified columns and - if a list is given, the supplied DataFrame must contain exactly the - same columns and in the same order and no other columns. - """ - - __slots__ = ['rows', 'columns', 'ordered', 'libraries'] + Parameters + ---------- + rows : int | tuple[int, int] | None + Constrains the number of rows allowed. + + - ``int`` → exact number of rows required. + - ``tuple[min, max]`` → inclusive bounds on number of rows. + - ``Integer`` bounds object → equivalent to a bounds tuple. + - ``None`` → no row constraint. + + columns : int | tuple[int, int] | list[str] | set[str] | None + Constrains the number or names of columns allowed. + + - ``int`` → exact number of columns required. + - ``tuple[min, max]`` → inclusive bounds on number of columns. + - ``list[str]`` → must contain *exactly* these columns, + in the same order and no others. + - ``set[str]`` → must contain *at least* these columns; + order and extra columns are allowed. + - ``None`` → no column constraint. + + Notes: + - Column labels are coerced to strings during validation. + - No dtype or index validation is performed. + + allow_lazy : bool, default=False + Whether to accept Narwhals **lazy** data frames + (e.g. Polars ``LazyFrame``). When ``False`` (default), + lazy frames raise a ``ValueError`` to prevent + unintentional deferred computation. + + When ``True``: + - Validation is deferred; ``rows`` and ``columns`` checks + are skipped because they would require collection. + - The parameter stores the lazy frame as-is. + - It is the user’s responsibility to collect or validate + results downstream. + + Notes + ----- + - **Backend-neutrality:** If Narwhals is available, validation + uses its unified API (e.g. ``nw.from_native(df).columns``, + ``nw.len(df)``) instead of pandas-specific methods. + - **No data conversion:** The parameter stores the frame you provide + (backend-native, pandas, or lazy). It will not automatically convert + between backends. + - **Performance:** For large frames, only metadata (row/column info) + is inspected; data contents are never copied. + - **Serialization:** When serializing, behavior remains identical + to the legacy pandas implementation unless overridden by downstream + frameworks. + """ + + __slots__ = ['rows', 'columns', 'ordered', 'allow_lazy'] _slot_defaults = _dict_update( - ClassSelector._slot_defaults, rows=None, columns=None, ordered=None, libraries=None - ) - - _supported_libraries = ('pandas', 'polars') + ClassSelector._slot_defaults, rows=None, columns=None, ordered=None, allow_lazy=False) @typing.overload def __init__( self, - default=None, *, rows=None, columns=None, ordered=None, libraries=None, is_instance=True, + default=None, *, rows=None, columns=None, ordered=None, is_instance=True, allow_None=False, doc=None, label=None, precedence=None, instantiate=True, constant=False, readonly=False, pickle_default_value=True, per_instance=True, - allow_refs=False, nested_refs=False + allow_refs=False, nested_refs=False, allow_lazy=False ): ... @_deprecate_positional_args - def __init__(self, default=Undefined, *, rows=Undefined, columns=Undefined, ordered=Undefined, libraries=Undefined, **params): - if libraries in (None, Undefined): - libraries = ('pandas',) - elif any(l not in self._supported_libraries for l in libraries): - raise ValueError(f'DataFrame parameter libraries must be one of {self._supported_libraries}') + def __init__( + self, default=Undefined, *, rows=Undefined, columns=Undefined, + ordered=Undefined, allow_lazy=Undefined, **params + ): self.rows = rows self.columns = columns self.ordered = ordered - self.libraries = libraries + self.allow_lazy = allow_lazy super().__init__(default=default, class_=None, **params) self._validate(self.default) @property def class_(self): - types = () - if 'pandas' in self.libraries and 'pandas' in sys.modules: + try: + import narwhals.stable.v2 as nw + except Exception: import pandas as pd - types += (pd.DataFrame,) - if 'polars' in self.libraries and 'polars' in sys.modules: - import polars as pl - types += (pl.DataFrame, pl.LazyFrame) - if not types: - return type(None) - return types if len(types) > 1 else types[0] + return pd.DataFrame + else: + return (nw.DataFrame, nw.LazyFrame) if self.allow_lazy is True else nw.DataFrame @class_.setter def class_(self, value): @@ -2360,6 +2397,18 @@ def _length_bounds_check(self, bounds, length, name): raise ValueError(f"{_validate_error_prefix(self)}: {message}") def _validate(self, val): + try: + import narwhals.stable.v2 as nw + except Exception: + nw = None + else: + if val is not None: + val = nw.from_native(val) + if isinstance(val, nw.LazyFrame) and not self.allow_lazy: + raise ValueError( + 'DataFrame parameter was given a LazyFrame; set allow_lazy=True to ' + 'allow lazy data frames.') + super()._validate(val) if isinstance(self.columns, set) and self.ordered is True: @@ -2371,31 +2420,63 @@ def _validate(self, val): if self.allow_None and val is None: return - if self.columns is None: - pass - elif (isinstance(self.columns, tuple) and len(self.columns)==2 - and all(isinstance(v, (type(None), numbers.Number)) for v in self.columns)): # Numeric bounds tuple - self._length_bounds_check(self.columns, len(val.columns), 'columns') + self._validate_columns(val) + self._validate_rows(val) + + def _validate_columns(self, val): + try: + import narwhals.stable.v2 as nw + except Exception: + nw = None + + if val is None or self.columns is None: + return + + if nw is None or not isinstance(val, nw.LazyFrame): + columns = val.columns + else: + columns = val.collect_schema().names() + + if (isinstance(self.columns, tuple) and len(self.columns) == 2 + and all(isinstance(v, (type(None), numbers.Number)) for v in self.columns)): # Numeric bounds tuple + self._length_bounds_check(self.columns, len(columns), 'columns') elif isinstance(self.columns, (list, set)): self.ordered = isinstance(self.columns, list) if self.ordered is None else self.ordered - difference = set(self.columns) - {str(el) for el in val.columns} + difference = set(self.columns) - {str(el) for el in columns} if difference: raise ValueError( f"{_validate_error_prefix(self)}: provided columns " - f"{list(val.columns)} does not contain required " - f"columns {sorted(self.columns)}" + f"{list(columns)} does not contain required " + f"columns {sorted(difference)}" ) else: - self._length_bounds_check(self.columns, len(val.columns), 'column') + self._length_bounds_check(self.columns, len(columns), 'column') if self.ordered: - if list(val.columns) != list(self.columns): + if list(columns) != list(columns): raise ValueError( f"{_validate_error_prefix(self)}: provided columns " - f"{list(val.columns)} must exactly match {self.columns}" + f"{list(columns)} must exactly match {self.columns}" ) - if self.rows is not None: - self._length_bounds_check(self.rows, len(val), 'row') + + def _validate_rows(self, val): + if self.rows is None or val is None: + return + + try: + import narwhals.stable.v2 as nw + except Exception: + nw = None + + if nw is None or not isinstance(val, nw.LazyFrame): + n_rows = int( + val.select(nw.len().alias('n_rows')) + .collect() + .item() + ) + else: + n_rows = len(val) + self._length_bounds_check(self.rows, n_rows, 'row') @classmethod def serialize(cls, value): @@ -2441,10 +2522,10 @@ class Series(ClassSelector): allowable number of rows. """ - __slots__ = ['rows'] + __slots__ = ['rows', 'allow_lazy'] _slot_defaults = _dict_update( - ClassSelector._slot_defaults, rows=None, allow_None=False + ClassSelector._slot_defaults, rows=None, allow_None=False, allow_lazy=False ) @typing.overload @@ -2453,18 +2534,47 @@ def __init__( default=None, *, rows=None, allow_None=False, is_instance=True, doc=None, label=None, precedence=None, instantiate=True, constant=False, readonly=False, pickle_default_value=True, per_instance=True, - allow_refs=False, nested_refs=False + allow_refs=False, nested_refs=False, allow_lazy=False ): ... @_deprecate_positional_args - def __init__(self, default=Undefined, *, rows=Undefined, allow_None=Undefined, **params): - from pandas import Series as pdSeries + def __init__(self, default=Undefined, *, rows=Undefined, allow_None=Undefined, allow_lazy=Undefined, **params): self.rows = rows - super().__init__(default=default, class_=pdSeries, allow_None=allow_None, - **params) + self.allow_lazy = allow_lazy + super().__init__(default=default, allow_None=allow_None, **params) self._validate(self.default) + @property + def class_(self): + try: + import narwhals.stable.v2 as nw + except Exception: + import pandas as pd + return pd.Series + else: + return (nw.Series, nw.LazySeries) if self.allow_lazy is True else nw.Series + + def _validate(self, val): + try: + import narwhals.stable.v2 as nw + except Exception: + pass + else: + if val is not None: + val = nw.from_native(val) + if isinstance(val, nw.LazyFrame) and not self.allow_lazy: + raise ValueError( + 'Series parameter was given a LazySeries; set allow_lazy=True to ' + 'allow lazy series.') + + super()._validate(val) + + if self.allow_None and val is None: + return + + self._validate_rows(val) + def _length_bounds_check(self, bounds, length, name): message = f'{name} length {length} does not match declared bounds of {bounds}' if not isinstance(bounds, tuple): @@ -2478,14 +2588,25 @@ def _length_bounds_check(self, bounds, length, name): if failure: raise ValueError(f"{_validate_error_prefix(self)}: {message}") - def _validate(self, val): - super()._validate(val) - - if self.allow_None and val is None: + def _validate_rows(self, val): + if self.rows is None or val is None: return - if self.rows is not None: - self._length_bounds_check(self.rows, len(val), 'row') + try: + import narwhals.stable.v2 as nw + except Exception: + nw = None + + if nw is None or not isinstance(val, nw.LazySeries): + n_rows = int( + val.to_frame().select(nw.len().alias('n_rows')) + .collect() + .item() + ) + else: + n_rows = len(val) + self._length_bounds_check(self.rows, n_rows, 'row') + #----------------------------------------------------------------------------- # List From 507edb3b5e856fd2813b9c3cd65cbe91c14f6046 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 3 Nov 2025 12:40:03 +0100 Subject: [PATCH 6/7] Fix merge issues --- param/parameters.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/param/parameters.py b/param/parameters.py index ac16c3ef9..c7c5391ee 100644 --- a/param/parameters.py +++ b/param/parameters.py @@ -2273,10 +2273,6 @@ class DataFrame(ClassSelector): order and extra columns are allowed. - ``None`` → no column constraint. - Notes: - - Column labels are coerced to strings during validation. - - No dtype or index validation is performed. - allow_lazy : bool, default=False Whether to accept Narwhals **lazy** data frames (e.g. Polars ``LazyFrame``). When ``False`` (default), @@ -2307,7 +2303,7 @@ class DataFrame(ClassSelector): __slots__ = ['rows', 'columns', 'ordered', 'allow_lazy'] - _slot_defaults = _dict_update( + _slot_defaults = dict( ClassSelector._slot_defaults, rows=None, columns=None, ordered=None, allow_lazy=False) @typing.overload @@ -2320,7 +2316,6 @@ def __init__( ): ... - @_deprecate_positional_args def __init__( self, default=Undefined, *, rows=Undefined, columns=Undefined, ordered=Undefined, allow_lazy=Undefined, **params @@ -2487,7 +2482,7 @@ class Series(ClassSelector): __slots__ = ['rows', 'allow_lazy'] - _slot_defaults = _dict_update( + _slot_defaults = dict( ClassSelector._slot_defaults, rows=None, allow_None=False, allow_lazy=False ) @@ -2501,7 +2496,6 @@ def __init__( ): ... - @_deprecate_positional_args def __init__(self, default=Undefined, *, rows=Undefined, allow_None=Undefined, allow_lazy=Undefined, **params): self.rows = rows self.allow_lazy = allow_lazy From b1886f4336678efc5a0a8fd66ffd9a887922feb2 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 3 Nov 2025 12:57:48 +0100 Subject: [PATCH 7/7] Small fixes --- param/parameters.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/param/parameters.py b/param/parameters.py index c7c5391ee..827da7429 100644 --- a/param/parameters.py +++ b/param/parameters.py @@ -2427,13 +2427,13 @@ def _validate_rows(self, val): nw = None if nw is None or not isinstance(val, nw.LazyFrame): + n_rows = len(val) + else: n_rows = int( val.select(nw.len().alias('n_rows')) .collect() .item() ) - else: - n_rows = len(val) self._length_bounds_check(self.rows, n_rows, 'row') @classmethod @@ -2499,7 +2499,7 @@ def __init__( def __init__(self, default=Undefined, *, rows=Undefined, allow_None=Undefined, allow_lazy=Undefined, **params): self.rows = rows self.allow_lazy = allow_lazy - super().__init__(default=default, allow_None=allow_None, **params) + super().__init__(default=default, allow_None=allow_None, class_=None, **params) self._validate(self.default) @property @@ -2555,13 +2555,13 @@ def _validate_rows(self, val): nw = None if nw is None or not isinstance(val, nw.LazySeries): + n_rows = len(val) + else: n_rows = int( val.to_frame().select(nw.len().alias('n_rows')) .collect() .item() ) - else: - n_rows = len(val) self._length_bounds_check(self.rows, n_rows, 'row')