Skip to content

Commit

Permalink
Fix hints
Browse files Browse the repository at this point in the history
  • Loading branch information
lbittarello committed Jan 7, 2025
1 parent 2ca74aa commit 9189ab3
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 32 deletions.
2 changes: 1 addition & 1 deletion src/tabmat/categorical_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def __init__(
cat_vec,
categories: Optional[np.ndarray] = None,
drop_first: bool = False,
dtype: np.dtype = np.float64,
dtype: np.typing.DTypeLike = np.float64,
column_name: Optional[str] = None,
term_name: Optional[str] = None,
column_name_format: str = "{name}[{category}]",
Expand Down
6 changes: 3 additions & 3 deletions src/tabmat/constructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
@nw.narwhalify(eager_only=True)
def from_df(
df,
dtype: np.dtype = np.float64,
dtype: np.typing.DTypeLike = np.float64,
sparse_threshold: float = 0.1,
cat_threshold: int = 4,
object_as_cat: bool = False,
Expand Down Expand Up @@ -207,7 +207,7 @@ def from_df(

def from_pandas(
df,
dtype: np.dtype = np.float64,
dtype: np.typing.DTypeLike = np.float64,
sparse_threshold: float = 0.1,
cat_threshold: int = 4,
object_as_cat: bool = False,
Expand Down Expand Up @@ -300,7 +300,7 @@ def from_formula(
data,
ensure_full_rank: bool = False,
na_action: Union[str, NAAction] = NAAction.IGNORE,
dtype: np.dtype = np.float64,
dtype: np.typing.DTypeLike = np.float64,
sparse_threshold: float = 0.1,
cat_threshold: int = 4,
interaction_separator: str = ":",
Expand Down
56 changes: 28 additions & 28 deletions src/tabmat/formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from collections.abc import Iterable
from typing import Any, Optional, Union

import numpy
import pandas
import numpy as np
import pandas as pd
from formulaic import ModelMatrix, ModelSpec
from formulaic.errors import FactorEncodingError
from formulaic.materializers import FormulaMaterializer
Expand All @@ -33,7 +33,7 @@ class TabmatMaterializer(FormulaMaterializer):
"""Materializer for pandas input and tabmat output."""

REGISTER_NAME = "tabmat"
REGISTER_INPUTS = ("pandas.core.frame.DataFrame", "pandas.DataFrame")
REGISTER_INPUTS = ("pd.core.frame.DataFrame", "pd.DataFrame")
REGISTER_OUTPUTS = "tabmat"

@override
Expand All @@ -43,7 +43,7 @@ def _init(self):
"categorical_format", "{name}[{category}]"
)
self.intercept_name = self.params.get("intercept_name", "Intercept")
self.dtype = self.params.get("dtype", numpy.float64)
self.dtype = self.params.get("dtype", np.float64)
self.sparse_threshold = self.params.get("sparse_threshold", 0.1)
self.cat_threshold = self.params.get("cat_threshold", 4)
self.add_column_for_intercept = self.params.get(
Expand All @@ -57,9 +57,9 @@ def _init(self):

@override
def _is_categorical(self, values):
if isinstance(values, (pandas.Series, pandas.Categorical)):
if isinstance(values, (pd.Series, pd.Categorical)):
return values.dtype == object or isinstance(
values.dtype, pandas.CategoricalDtype
values.dtype, pd.CategoricalDtype
)
return super()._is_categorical(values)

Expand All @@ -69,12 +69,12 @@ def _check_for_nulls(self, name, values, na_action, drop_rows):
return

if na_action is NAAction.RAISE:
if isinstance(values, pandas.Series) and values.isnull().values.any():
if isinstance(values, pd.Series) and values.isnull().values.any():
raise ValueError(f"`{name}` contains null values after evaluation.")

elif na_action is NAAction.DROP:
if isinstance(values, pandas.Series):
drop_rows.update(numpy.flatnonzero(values.isnull().values))
if isinstance(values, pd.Series):
drop_rows.update(np.flatnonzero(values.isnull().values))

else:
raise ValueError(
Expand All @@ -83,17 +83,17 @@ def _check_for_nulls(self, name, values, na_action, drop_rows):

@override
def _encode_constant(self, value, metadata, encoder_state, spec, drop_rows):
series = value * numpy.ones(self.nrows - len(drop_rows))
series = value * np.ones(self.nrows - len(drop_rows))
return _InteractableDenseVector(series, name=self.intercept_name)

@override
def _encode_numerical(self, values, metadata, encoder_state, spec, drop_rows):
if drop_rows:
values = values.drop(index=values.index[drop_rows])
if isinstance(values, pandas.Series):
if isinstance(values, pd.Series):
values = values.to_numpy().astype(self.dtype)
if (values != 0).mean() <= self.sparse_threshold:
return _InteractableSparseVector(sps.csc_matrix(values[:, numpy.newaxis]))
return _InteractableSparseVector(sps.csc_matrix(values[:, np.newaxis]))
else:
return _InteractableDenseVector(values)

Expand All @@ -118,7 +118,7 @@ def _encode_categorical(
def _combine_columns(self, cols, spec, drop_rows):
# Special case no columns
if not cols:
values = numpy.empty((self.data.shape[0], 0), dtype=self.dtype)
values = np.empty((self.data.shape[0], 0), dtype=self.dtype)
return DenseMatrix(values)

# Otherwise, concatenate columns into SplitMatrix
Expand Down Expand Up @@ -305,7 +305,7 @@ class _InteractableVector(ABC):
@abstractmethod
def to_tabmat(
self,
dtype: numpy.dtype,
dtype: np.typing.DTypeLike,
sparse_threshold: float,
cat_threshold: int,
) -> MatrixBase:
Expand Down Expand Up @@ -345,7 +345,7 @@ def set_name(self, name, name_format):


class _InteractableDenseVector(_InteractableVector):
def __init__(self, values: numpy.ndarray, name: Optional[str] = None):
def __init__(self, values: np.ndarray, name: Optional[str] = None):
self.values = values
self.name = name

Expand All @@ -358,7 +358,7 @@ def __rmul__(self, other):

def to_tabmat(
self,
dtype: numpy.dtype = numpy.float64,
dtype: np.typing.DTypeLike = np.float64,
sparse_threshold: float = 0.1,
cat_threshold: int = 4,
) -> Union[SparseMatrix, DenseMatrix]:
Expand All @@ -367,7 +367,7 @@ def to_tabmat(
else:
# Columns can become sparser, but not denser through interactions
return SparseMatrix(
sps.csc_matrix(self.values[:, numpy.newaxis]), column_names=[self.name]
sps.csc_matrix(self.values[:, np.newaxis]), column_names=[self.name]
)

def get_names(self) -> list[str]:
Expand All @@ -394,7 +394,7 @@ def __rmul__(self, other):

def to_tabmat(
self,
dtype: numpy.dtype = numpy.float64,
dtype: np.typing.DTypeLike = np.float64,
sparse_threshold: float = 0.1,
cat_threshold: int = 4,
) -> SparseMatrix:
Expand All @@ -413,9 +413,9 @@ def set_name(self, name, name_format=None) -> "_InteractableSparseVector":
class _InteractableCategoricalVector(_InteractableVector):
def __init__(
self,
codes: numpy.ndarray,
codes: np.ndarray,
categories: list[str],
multipliers: numpy.ndarray,
multipliers: np.ndarray,
name: Optional[str] = None,
):
# sentinel values for codes:
Expand All @@ -429,15 +429,15 @@ def __init__(
@classmethod
def from_categorical(
cls,
cat: pandas.Categorical,
cat: pd.Categorical,
reduced_rank: bool,
missing_method: str = "fail",
missing_name: str = "(MISSING)",
add_missing_category: bool = False,
) -> "_InteractableCategoricalVector":
"""Create an interactable categorical vector from a pandas categorical."""
categories = list(cat.categories)
codes = cat.codes.copy().astype(numpy.int64)
codes = cat.codes.copy().astype(np.int64)

if reduced_rank:
codes[codes == 0] = -2
Expand All @@ -457,7 +457,7 @@ def from_categorical(
return cls(
codes=codes,
categories=categories,
multipliers=numpy.ones(len(cat.codes)),
multipliers=np.ones(len(cat.codes)),
)

def __rmul__(self, other):
Expand All @@ -471,7 +471,7 @@ def __rmul__(self, other):

def to_tabmat(
self,
dtype: numpy.dtype = numpy.float64,
dtype: np.typing.DTypeLike = np.float64,
sparse_threshold: float = 0.1,
cat_threshold: int = 4,
) -> Union[DenseMatrix, CategoricalMatrix, SplitMatrix]:
Expand All @@ -485,7 +485,7 @@ def to_tabmat(
else:
drop_first = False

cat = pandas.Categorical.from_codes(
cat = pd.Categorical.from_codes(
codes=codes,
categories=categories,
ordered=False,
Expand All @@ -502,12 +502,12 @@ def to_tabmat(

if (self.codes == -2).all():
# All values are dropped
return DenseMatrix(numpy.empty((len(codes), 0), dtype=dtype))
return DenseMatrix(np.empty((len(codes), 0), dtype=dtype))
elif (self.multipliers == 1).all() and len(categories) >= cat_threshold:
return categorical_part
else:
sparse_matrix = sps.csc_matrix(
categorical_part.tocsr().multiply(self.multipliers[:, numpy.newaxis])
categorical_part.tocsr().multiply(self.multipliers[:, np.newaxis])
)
(
dense_part,
Expand Down Expand Up @@ -744,7 +744,7 @@ def encode_contrasts(
f"Column {data.name} contains unseen categories: {unseen_categories}."
)

cat = pandas.Categorical(data._values, categories=levels)
cat = pd.Categorical(data._values, categories=levels)
_state["categories"] = cat.categories
_state["add_missing_category"] = add_missing_category or (
missing_method == "convert" and cat.isna().any()
Expand Down

0 comments on commit 9189ab3

Please sign in to comment.