Quantco · stanmart · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
@@ -45,6 +45,7 @@ requirements:
     - python
     - formulaic >=0.6
     - joblib
+    - narwhals >=2.0.0
     - numexpr
     - packaging
     - pandas

@@ -54,6 +54,7 @@ wheel = "*"
 
 [dependencies]
 formulaic = "*"
+narwhals = ">=2.0.0"
 numexpr = "*"
 packaging = "*"
 pandas = ">=1.4"
@@ -134,7 +135,7 @@ cxx-compiler = "*"
 cython = "!=3.0.4"
 make = "*"
 mako = "*"
-narwhals = ">=1.4.1"
+narwhals = ">=2.0.0"
 pip = "*"
 setuptools-scm = "*"
 xsimd = "<11|>12.1"
@@ -154,6 +155,7 @@ python = "3.13.*"
 
 [feature.oldies.dependencies]
 formulaic = "0.6.*"
+narwhals = "2.0.*"
 pandas = "1.4.*"
 python = "3.9.*"
 scikit-learn = "0.24.*"

@@ -4,16 +4,19 @@
 import typing
 import warnings
 from collections.abc import Iterable, Mapping, Sequence
-from typing import Any, Optional, Union
+from typing import Any, Optional, Union, cast
 
 import formulaic
+import narwhals.stable.v2 as nw
 import numpy as np
 import packaging.version
 import pandas as pd
 import scipy.sparse as sps
 import sklearn as skl
 import tabmat as tm
+from narwhals.typing import IntoDataFrame
 from scipy import linalg, sparse, stats
+from typing_extensions import deprecated
 
 from ._distribution import (
     BinomialDistribution,
@@ -38,7 +41,13 @@
     _least_squares_solver,
     _trust_constr_solver,
 )
-from ._typing import ArrayLike, ShapedArrayLike, VectorLike, WaldTestResult
+from ._typing import (
+    ArrayLike,
+    ShapedArrayLike,
+    ShapedArrayLikeConverted,
+    VectorLike,
+    WaldTestResult,
+)
 from ._utils import (
     add_missing_categories,
     align_df_categories,
@@ -174,6 +183,29 @@ def link_instance(self) -> Link:
         else:
             return get_link(self.link, self.family_instance)
 
+    @property
+    def categorical_levels_(self) -> dict[str, list[str]]:
+        if hasattr(self, "_categorical_levels_"):
+            return self._categorical_levels_
+        if hasattr(self, "feature_dtypes_"):
+            # Compatibility with pickled models
+            return {
+                col: dtype.categories.tolist()
+                for col, dtype in self.feature_dtypes_.items()
+                if isinstance(dtype, pd.CategoricalDtype)
+            }
+        raise AttributeError("No categorical levels stored.")
+
+    @property
+    @deprecated("Use `categorical_levels_` instead.")
+    def feature_dtypes_(self) -> dict[str, Any]:
+        return self._feature_dtypes_
+
+    @feature_dtypes_.setter
+    @deprecated("Use `categorical_levels_` instead.")
+    def feature_dtypes_(self, value: dict[str, Any]) -> None:
+        self._feature_dtypes_ = value
+
     def _get_start_coef(
         self,
         X: Union[tm.MatrixBase, tm.StandardizedMatrix],
@@ -245,9 +277,9 @@ def _get_start_coef(
 
         return coef
 
-    def _convert_from_pandas(
+    def _convert_from_df(
         self,
-        df: pd.DataFrame,
+        df: IntoDataFrame,
         context: Optional[Mapping[str, Any]] = None,
     ) -> tm.MatrixBase:
         """Convert a pandas data frame to a tabmat matrix."""
@@ -256,25 +288,27 @@ def _convert_from_pandas(
 
         cat_missing_method_after_alignment = getattr(self, "cat_missing_method", "fail")
 
-        if hasattr(self, "feature_dtypes_"):
+        df = nw.from_native(df)
+
+        if hasattr(self, "categorical_levels_"):
             df = align_df_categories(
                 df,
-                self.feature_dtypes_,
+                self.categorical_levels_,
                 getattr(self, "has_missing_category_", {}),
                 cat_missing_method_after_alignment,
             )
             if cat_missing_method_after_alignment == "convert":
                 df = add_missing_categories(
                     df=df,
-                    dtypes=self.feature_dtypes_,
+                    categorical_levels=self.categorical_levels_,
                     feature_names=self.feature_names_,
                     cat_missing_name=self.cat_missing_name,
                     categorical_format=self.categorical_format,
                 )
                 # there should be no missing categories after this
                 cat_missing_method_after_alignment = "fail"
 
-        X = tm.from_pandas(
+        X = tm.from_df(
             df,
             drop_first=self.drop_first,
             categorical_format=getattr(  # convention prior to v3
@@ -718,8 +752,8 @@ def linear_predictor(
         elif alpha is not None:
             alpha_index = [self._find_alpha_index(a) for a in alpha]  # type: ignore
 
-        if isinstance(X, pd.DataFrame):
-            X = self._convert_from_pandas(X, context=capture_context(context))
+        if nw.dependencies.is_into_dataframe(X):
+            X = self._convert_from_df(X, context=capture_context(context))
 
         X = check_array_tabmat_compliant(
             X,
@@ -807,8 +841,9 @@ def predict(
         array, shape (n_samples, n_alphas)
             Predicted values times ``sample_weight``.
         """
-        if isinstance(X, pd.DataFrame):
-            X = self._convert_from_pandas(X, context=capture_context(context))
+        if nw.dependencies.is_into_dataframe(X):
+            X = self._convert_from_df(X, context=capture_context(context))
+        X = cast(ShapedArrayLikeConverted, X)
 
         eta = self.linear_predictor(
             X, offset=offset, alpha_index=alpha_index, alpha=alpha, context=context
@@ -1452,8 +1487,8 @@ def covariance_matrix(
                 y = self.y_model_spec_.get_model_matrix(X).toarray().ravel()
                 # This has to go first because X is modified in the next line
 
-            if isinstance(X, pd.DataFrame):
-                X = self._convert_from_pandas(X, context=capture_context(context))
+            if nw.dependencies.is_into_dataframe(X):
+                X = self._convert_from_df(X, context=capture_context(context))
 
             X, y = check_X_y_tabmat_compliant(
                 X,
@@ -1566,8 +1601,8 @@ def covariance_matrix(
     def score(
         self,
         X: ShapedArrayLike,
-        y: ShapedArrayLike,
-        sample_weight: Optional[ArrayLike] = None,
+        y: VectorLike,
+        sample_weight: Optional[VectorLike] = None,
         offset: Optional[ArrayLike] = None,
         *,
         context: Optional[Union[int, Mapping[str, Any]]] = None,
@@ -1724,7 +1759,7 @@ def _should_copy_X(self):
     def _set_up_and_check_fit_args(
         self,
         X: ArrayLike,
-        y: Optional[ArrayLike],
+        y: Optional[VectorLike],
         sample_weight: Optional[VectorLike],
         offset: Optional[VectorLike],
         force_all_finite,
@@ -1747,8 +1782,8 @@ def _set_up_and_check_fit_args(
         copy_X = self._should_copy_X()
         drop_first = getattr(self, "drop_first", False)
 
-        if isinstance(X, pd.DataFrame):
-            if hasattr(self, "formula") and self.formula is not None:
+        if nw.dependencies.is_into_dataframe(X):
+            if getattr(self, "formula", None) is not None:
                 lhs, rhs = parse_formula(
                     self.formula, include_intercept=self.fit_intercept
                 )
@@ -1802,24 +1837,36 @@ def _set_up_and_check_fit_args(
             else:
                 # Maybe TODO: expand categorical penalties with formulas
 
-                self.feature_dtypes_ = X.dtypes.to_dict()
+                # Backwards compatibility
+                if isinstance(X, pd.DataFrame):
+                    self.feature_dtypes_ = X.dtypes.to_dict()
+
+                X = cast(nw.DataFrame, nw.from_native(X))  # avoid inferring `Never`
+
+                self._categorical_levels_ = {
+                    col: X[col].cat.get_categories().to_list()
+                    for col, dtype in X.schema.items()
+                    if isinstance(dtype, (nw.Categorical, nw.Enum))
+                }
 
                 self.has_missing_category_ = {
                     col: (getattr(self, "cat_missing_method", "fail") == "convert")
-                    and X[col].isna().any()
-                    for col, dtype in self.feature_dtypes_.items()
-                    if isinstance(dtype, pd.CategoricalDtype)
+                    and X[col].is_null().any()
+                    for col in self.categorical_levels_
                 }
 
-                if any(X.dtypes == "category"):
+                if any(
+                    isinstance(dtype, (nw.Categorical, nw.Enum))
+                    for dtype in X.schema.values()
+                ):
                     P1 = expand_categorical_penalties(
                         self.P1, X, drop_first, self.has_missing_category_
                     )
                     P2 = expand_categorical_penalties(
                         self.P2, X, drop_first, self.has_missing_category_
                     )
 
-                X = tm.from_pandas(
+                X = tm.from_df(
                     X,
                     drop_first=drop_first,
                     categorical_format=getattr(  # convention prior to v3
@@ -1841,7 +1888,7 @@ def _set_up_and_check_fit_args(
                     "The X matrix is noncontiguous and copy_X = False."
                     "To fix this, either set copy_X = None or pass a contiguous matrix."
                 )
-            X = X.copy()
+            X = X.copy()  # TODO: not all dataframes can be copied like this
 
         if (
             not isinstance(X, tm.CategoricalMatrix)
@@ -2672,8 +2719,8 @@ def fit(
 
     def _compute_information_criteria(
         self,
-        X: ShapedArrayLike,
-        y: ShapedArrayLike,
+        X: ShapedArrayLikeConverted,
+        y: VectorLike,
         sample_weight: Optional[ArrayLike] = None,
         context: Optional[Mapping[str, Any]] = None,
     ):
@@ -2732,7 +2779,7 @@ def _compute_information_criteria(
     def aic(
         self,
         X: ArrayLike,
-        y: ArrayLike,
+        y: VectorLike,
         sample_weight: Optional[ArrayLike] = None,
         *,
         context: Optional[Union[int, Mapping[str, Any]]] = None,
@@ -2769,7 +2816,7 @@ def aic(
     def aicc(
         self,
         X: ArrayLike,
-        y: ArrayLike,
+        y: VectorLike,
         sample_weight: Optional[ArrayLike] = None,
         *,
         context: Optional[Union[int, Mapping[str, Any]]] = None,
@@ -2814,7 +2861,7 @@ def aicc(
     def bic(
         self,
         X: ArrayLike,
-        y: ArrayLike,
+        y: VectorLike,
         sample_weight: Optional[ArrayLike] = None,
         *,
         context: Optional[Union[int, Mapping[str, Any]]] = None,
@@ -2853,7 +2900,7 @@ def _get_info_criteria(
         self,
         crit: str,
         X: ArrayLike,
-        y: ArrayLike,
+        y: VectorLike,
         sample_weight: Optional[ArrayLike] = None,
         context: Optional[Union[int, Mapping[str, Any]]] = None,
     ):

@@ -4,22 +4,30 @@
 import pandas as pd
 import scipy.sparse
 import tabmat as tm
+from narwhals.typing import IntoDataFrame
 
 VectorLike = Union[np.ndarray, pd.api.extensions.ExtensionArray, pd.Index, pd.Series]
 
 ArrayLike = Union[
     list,
     tm.MatrixBase,
     tm.StandardizedMatrix,
-    pd.DataFrame,
+    IntoDataFrame,
     scipy.sparse.spmatrix,
     VectorLike,
 ]
 
 ShapedArrayLike = Union[
     tm.MatrixBase,
     tm.StandardizedMatrix,
-    pd.DataFrame,
+    IntoDataFrame,
+    scipy.sparse.spmatrix,
+    VectorLike,
+]
+
+ShapedArrayLikeConverted = Union[
+    tm.MatrixBase,
+    tm.StandardizedMatrix,
     scipy.sparse.spmatrix,
     VectorLike,
 ]
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,6 +45,7 @@ requirements: @@
         - python
         - formulaic >=0.6
         - joblib
+        - narwhals >=2.0.0
         - numexpr
         - packaging
         - pandas
@@ Expand Down @@