Fix hints

lbittarello · lbittarello · commit 9189ab365d67 · 2025-01-07T14:53:00.000Z
diff --git a/src/tabmat/categorical_matrix.py b/src/tabmat/categorical_matrix.py
@@ -339,7 +339,7 @@ def __init__(
         cat_vec,
         categories: Optional[np.ndarray] = None,
         drop_first: bool = False,
-        dtype: np.dtype = np.float64,
+        dtype: np.typing.DTypeLike = np.float64,
         column_name: Optional[str] = None,
         term_name: Optional[str] = None,
         column_name_format: str = "{name}[{category}]",
diff --git a/src/tabmat/constructor.py b/src/tabmat/constructor.py
@@ -28,7 +28,7 @@
 @nw.narwhalify(eager_only=True)
 def from_df(
     df,
-    dtype: np.dtype = np.float64,
+    dtype: np.typing.DTypeLike = np.float64,
     sparse_threshold: float = 0.1,
     cat_threshold: int = 4,
     object_as_cat: bool = False,
@@ -207,7 +207,7 @@ def from_df(
 
 def from_pandas(
     df,
-    dtype: np.dtype = np.float64,
+    dtype: np.typing.DTypeLike = np.float64,
     sparse_threshold: float = 0.1,
     cat_threshold: int = 4,
     object_as_cat: bool = False,
@@ -300,7 +300,7 @@ def from_formula(
     data,
     ensure_full_rank: bool = False,
     na_action: Union[str, NAAction] = NAAction.IGNORE,
-    dtype: np.dtype = np.float64,
+    dtype: np.typing.DTypeLike = np.float64,
     sparse_threshold: float = 0.1,
     cat_threshold: int = 4,
     interaction_separator: str = ":",
diff --git a/src/tabmat/formula.py b/src/tabmat/formula.py
@@ -5,8 +5,8 @@
 from collections.abc import Iterable
 from typing import Any, Optional, Union
 
-import numpy
-import pandas
+import numpy as np
+import pandas as pd
 from formulaic import ModelMatrix, ModelSpec
 from formulaic.errors import FactorEncodingError
 from formulaic.materializers import FormulaMaterializer
@@ -33,7 +33,7 @@ class TabmatMaterializer(FormulaMaterializer):
     """Materializer for pandas input and tabmat output."""
 
     REGISTER_NAME = "tabmat"
-    REGISTER_INPUTS = ("pandas.core.frame.DataFrame", "pandas.DataFrame")
+    REGISTER_INPUTS = ("pd.core.frame.DataFrame", "pd.DataFrame")
     REGISTER_OUTPUTS = "tabmat"
 
     @override
@@ -43,7 +43,7 @@ def _init(self):
             "categorical_format", "{name}[{category}]"
         )
         self.intercept_name = self.params.get("intercept_name", "Intercept")
-        self.dtype = self.params.get("dtype", numpy.float64)
+        self.dtype = self.params.get("dtype", np.float64)
         self.sparse_threshold = self.params.get("sparse_threshold", 0.1)
         self.cat_threshold = self.params.get("cat_threshold", 4)
         self.add_column_for_intercept = self.params.get(
@@ -57,9 +57,9 @@ def _init(self):
 
     @override
     def _is_categorical(self, values):
-        if isinstance(values, (pandas.Series, pandas.Categorical)):
+        if isinstance(values, (pd.Series, pd.Categorical)):
             return values.dtype == object or isinstance(
-                values.dtype, pandas.CategoricalDtype
+                values.dtype, pd.CategoricalDtype
             )
         return super()._is_categorical(values)
 
@@ -69,12 +69,12 @@ def _check_for_nulls(self, name, values, na_action, drop_rows):
             return
 
         if na_action is NAAction.RAISE:
-            if isinstance(values, pandas.Series) and values.isnull().values.any():
+            if isinstance(values, pd.Series) and values.isnull().values.any():
                 raise ValueError(f"`{name}` contains null values after evaluation.")
 
         elif na_action is NAAction.DROP:
-            if isinstance(values, pandas.Series):
-                drop_rows.update(numpy.flatnonzero(values.isnull().values))
+            if isinstance(values, pd.Series):
+                drop_rows.update(np.flatnonzero(values.isnull().values))
 
         else:
             raise ValueError(
@@ -83,17 +83,17 @@ def _check_for_nulls(self, name, values, na_action, drop_rows):
 
     @override
     def _encode_constant(self, value, metadata, encoder_state, spec, drop_rows):
-        series = value * numpy.ones(self.nrows - len(drop_rows))
+        series = value * np.ones(self.nrows - len(drop_rows))
         return _InteractableDenseVector(series, name=self.intercept_name)
 
     @override
     def _encode_numerical(self, values, metadata, encoder_state, spec, drop_rows):
         if drop_rows:
             values = values.drop(index=values.index[drop_rows])
-        if isinstance(values, pandas.Series):
+        if isinstance(values, pd.Series):
             values = values.to_numpy().astype(self.dtype)
         if (values != 0).mean() <= self.sparse_threshold:
-            return _InteractableSparseVector(sps.csc_matrix(values[:, numpy.newaxis]))
+            return _InteractableSparseVector(sps.csc_matrix(values[:, np.newaxis]))
         else:
             return _InteractableDenseVector(values)
 
@@ -118,7 +118,7 @@ def _encode_categorical(
     def _combine_columns(self, cols, spec, drop_rows):
         # Special case no columns
         if not cols:
-            values = numpy.empty((self.data.shape[0], 0), dtype=self.dtype)
+            values = np.empty((self.data.shape[0], 0), dtype=self.dtype)
             return DenseMatrix(values)
 
         # Otherwise, concatenate columns into SplitMatrix
@@ -305,7 +305,7 @@ class _InteractableVector(ABC):
     @abstractmethod
     def to_tabmat(
         self,
-        dtype: numpy.dtype,
+        dtype: np.typing.DTypeLike,
         sparse_threshold: float,
         cat_threshold: int,
     ) -> MatrixBase:
@@ -345,7 +345,7 @@ def set_name(self, name, name_format):
 
 
 class _InteractableDenseVector(_InteractableVector):
-    def __init__(self, values: numpy.ndarray, name: Optional[str] = None):
+    def __init__(self, values: np.ndarray, name: Optional[str] = None):
         self.values = values
         self.name = name
 
@@ -358,7 +358,7 @@ def __rmul__(self, other):
 
     def to_tabmat(
         self,
-        dtype: numpy.dtype = numpy.float64,
+        dtype: np.typing.DTypeLike = np.float64,
         sparse_threshold: float = 0.1,
         cat_threshold: int = 4,
     ) -> Union[SparseMatrix, DenseMatrix]:
@@ -367,7 +367,7 @@ def to_tabmat(
         else:
             # Columns can become sparser, but not denser through interactions
             return SparseMatrix(
-                sps.csc_matrix(self.values[:, numpy.newaxis]), column_names=[self.name]
+                sps.csc_matrix(self.values[:, np.newaxis]), column_names=[self.name]
             )
 
     def get_names(self) -> list[str]:
@@ -394,7 +394,7 @@ def __rmul__(self, other):
 
     def to_tabmat(
         self,
-        dtype: numpy.dtype = numpy.float64,
+        dtype: np.typing.DTypeLike = np.float64,
         sparse_threshold: float = 0.1,
         cat_threshold: int = 4,
     ) -> SparseMatrix:
@@ -413,9 +413,9 @@ def set_name(self, name, name_format=None) -> "_InteractableSparseVector":
 class _InteractableCategoricalVector(_InteractableVector):
     def __init__(
         self,
-        codes: numpy.ndarray,
+        codes: np.ndarray,
         categories: list[str],
-        multipliers: numpy.ndarray,
+        multipliers: np.ndarray,
         name: Optional[str] = None,
     ):
         # sentinel values for codes:
@@ -429,15 +429,15 @@ def __init__(
     @classmethod
     def from_categorical(
         cls,
-        cat: pandas.Categorical,
+        cat: pd.Categorical,
         reduced_rank: bool,
         missing_method: str = "fail",
         missing_name: str = "(MISSING)",
         add_missing_category: bool = False,
     ) -> "_InteractableCategoricalVector":
         """Create an interactable categorical vector from a pandas categorical."""
         categories = list(cat.categories)
-        codes = cat.codes.copy().astype(numpy.int64)
+        codes = cat.codes.copy().astype(np.int64)
 
         if reduced_rank:
             codes[codes == 0] = -2
@@ -457,7 +457,7 @@ def from_categorical(
         return cls(
             codes=codes,
             categories=categories,
-            multipliers=numpy.ones(len(cat.codes)),
+            multipliers=np.ones(len(cat.codes)),
         )
 
     def __rmul__(self, other):
@@ -471,7 +471,7 @@ def __rmul__(self, other):
 
     def to_tabmat(
         self,
-        dtype: numpy.dtype = numpy.float64,
+        dtype: np.typing.DTypeLike = np.float64,
         sparse_threshold: float = 0.1,
         cat_threshold: int = 4,
     ) -> Union[DenseMatrix, CategoricalMatrix, SplitMatrix]:
@@ -485,7 +485,7 @@ def to_tabmat(
         else:
             drop_first = False
 
-        cat = pandas.Categorical.from_codes(
+        cat = pd.Categorical.from_codes(
             codes=codes,
             categories=categories,
             ordered=False,
@@ -502,12 +502,12 @@ def to_tabmat(
 
         if (self.codes == -2).all():
             # All values are dropped
-            return DenseMatrix(numpy.empty((len(codes), 0), dtype=dtype))
+            return DenseMatrix(np.empty((len(codes), 0), dtype=dtype))
         elif (self.multipliers == 1).all() and len(categories) >= cat_threshold:
             return categorical_part
         else:
             sparse_matrix = sps.csc_matrix(
-                categorical_part.tocsr().multiply(self.multipliers[:, numpy.newaxis])
+                categorical_part.tocsr().multiply(self.multipliers[:, np.newaxis])
             )
             (
                 dense_part,
@@ -744,7 +744,7 @@ def encode_contrasts(
                 f"Column {data.name} contains unseen categories: {unseen_categories}."
             )
 
-    cat = pandas.Categorical(data._values, categories=levels)
+    cat = pd.Categorical(data._values, categories=levels)
     _state["categories"] = cat.categories
     _state["add_missing_category"] = add_missing_category or (
         missing_method == "convert" and cat.isna().any()