Skip to content

Commit 9189ab3

Browse files
committed
Fix hints
1 parent 2ca74aa commit 9189ab3

File tree

3 files changed

+32
-32
lines changed

3 files changed

+32
-32
lines changed

src/tabmat/categorical_matrix.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def __init__(
339339
cat_vec,
340340
categories: Optional[np.ndarray] = None,
341341
drop_first: bool = False,
342-
dtype: np.dtype = np.float64,
342+
dtype: np.typing.DTypeLike = np.float64,
343343
column_name: Optional[str] = None,
344344
term_name: Optional[str] = None,
345345
column_name_format: str = "{name}[{category}]",

src/tabmat/constructor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
@nw.narwhalify(eager_only=True)
2929
def from_df(
3030
df,
31-
dtype: np.dtype = np.float64,
31+
dtype: np.typing.DTypeLike = np.float64,
3232
sparse_threshold: float = 0.1,
3333
cat_threshold: int = 4,
3434
object_as_cat: bool = False,
@@ -207,7 +207,7 @@ def from_df(
207207

208208
def from_pandas(
209209
df,
210-
dtype: np.dtype = np.float64,
210+
dtype: np.typing.DTypeLike = np.float64,
211211
sparse_threshold: float = 0.1,
212212
cat_threshold: int = 4,
213213
object_as_cat: bool = False,
@@ -300,7 +300,7 @@ def from_formula(
300300
data,
301301
ensure_full_rank: bool = False,
302302
na_action: Union[str, NAAction] = NAAction.IGNORE,
303-
dtype: np.dtype = np.float64,
303+
dtype: np.typing.DTypeLike = np.float64,
304304
sparse_threshold: float = 0.1,
305305
cat_threshold: int = 4,
306306
interaction_separator: str = ":",

src/tabmat/formula.py

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from collections.abc import Iterable
66
from typing import Any, Optional, Union
77

8-
import numpy
9-
import pandas
8+
import numpy as np
9+
import pandas as pd
1010
from formulaic import ModelMatrix, ModelSpec
1111
from formulaic.errors import FactorEncodingError
1212
from formulaic.materializers import FormulaMaterializer
@@ -33,7 +33,7 @@ class TabmatMaterializer(FormulaMaterializer):
3333
"""Materializer for pandas input and tabmat output."""
3434

3535
REGISTER_NAME = "tabmat"
36-
REGISTER_INPUTS = ("pandas.core.frame.DataFrame", "pandas.DataFrame")
36+
REGISTER_INPUTS = ("pd.core.frame.DataFrame", "pd.DataFrame")
3737
REGISTER_OUTPUTS = "tabmat"
3838

3939
@override
@@ -43,7 +43,7 @@ def _init(self):
4343
"categorical_format", "{name}[{category}]"
4444
)
4545
self.intercept_name = self.params.get("intercept_name", "Intercept")
46-
self.dtype = self.params.get("dtype", numpy.float64)
46+
self.dtype = self.params.get("dtype", np.float64)
4747
self.sparse_threshold = self.params.get("sparse_threshold", 0.1)
4848
self.cat_threshold = self.params.get("cat_threshold", 4)
4949
self.add_column_for_intercept = self.params.get(
@@ -57,9 +57,9 @@ def _init(self):
5757

5858
@override
5959
def _is_categorical(self, values):
60-
if isinstance(values, (pandas.Series, pandas.Categorical)):
60+
if isinstance(values, (pd.Series, pd.Categorical)):
6161
return values.dtype == object or isinstance(
62-
values.dtype, pandas.CategoricalDtype
62+
values.dtype, pd.CategoricalDtype
6363
)
6464
return super()._is_categorical(values)
6565

@@ -69,12 +69,12 @@ def _check_for_nulls(self, name, values, na_action, drop_rows):
6969
return
7070

7171
if na_action is NAAction.RAISE:
72-
if isinstance(values, pandas.Series) and values.isnull().values.any():
72+
if isinstance(values, pd.Series) and values.isnull().values.any():
7373
raise ValueError(f"`{name}` contains null values after evaluation.")
7474

7575
elif na_action is NAAction.DROP:
76-
if isinstance(values, pandas.Series):
77-
drop_rows.update(numpy.flatnonzero(values.isnull().values))
76+
if isinstance(values, pd.Series):
77+
drop_rows.update(np.flatnonzero(values.isnull().values))
7878

7979
else:
8080
raise ValueError(
@@ -83,17 +83,17 @@ def _check_for_nulls(self, name, values, na_action, drop_rows):
8383

8484
@override
8585
def _encode_constant(self, value, metadata, encoder_state, spec, drop_rows):
86-
series = value * numpy.ones(self.nrows - len(drop_rows))
86+
series = value * np.ones(self.nrows - len(drop_rows))
8787
return _InteractableDenseVector(series, name=self.intercept_name)
8888

8989
@override
9090
def _encode_numerical(self, values, metadata, encoder_state, spec, drop_rows):
9191
if drop_rows:
9292
values = values.drop(index=values.index[drop_rows])
93-
if isinstance(values, pandas.Series):
93+
if isinstance(values, pd.Series):
9494
values = values.to_numpy().astype(self.dtype)
9595
if (values != 0).mean() <= self.sparse_threshold:
96-
return _InteractableSparseVector(sps.csc_matrix(values[:, numpy.newaxis]))
96+
return _InteractableSparseVector(sps.csc_matrix(values[:, np.newaxis]))
9797
else:
9898
return _InteractableDenseVector(values)
9999

@@ -118,7 +118,7 @@ def _encode_categorical(
118118
def _combine_columns(self, cols, spec, drop_rows):
119119
# Special case no columns
120120
if not cols:
121-
values = numpy.empty((self.data.shape[0], 0), dtype=self.dtype)
121+
values = np.empty((self.data.shape[0], 0), dtype=self.dtype)
122122
return DenseMatrix(values)
123123

124124
# Otherwise, concatenate columns into SplitMatrix
@@ -305,7 +305,7 @@ class _InteractableVector(ABC):
305305
@abstractmethod
306306
def to_tabmat(
307307
self,
308-
dtype: numpy.dtype,
308+
dtype: np.typing.DTypeLike,
309309
sparse_threshold: float,
310310
cat_threshold: int,
311311
) -> MatrixBase:
@@ -345,7 +345,7 @@ def set_name(self, name, name_format):
345345

346346

347347
class _InteractableDenseVector(_InteractableVector):
348-
def __init__(self, values: numpy.ndarray, name: Optional[str] = None):
348+
def __init__(self, values: np.ndarray, name: Optional[str] = None):
349349
self.values = values
350350
self.name = name
351351

@@ -358,7 +358,7 @@ def __rmul__(self, other):
358358

359359
def to_tabmat(
360360
self,
361-
dtype: numpy.dtype = numpy.float64,
361+
dtype: np.typing.DTypeLike = np.float64,
362362
sparse_threshold: float = 0.1,
363363
cat_threshold: int = 4,
364364
) -> Union[SparseMatrix, DenseMatrix]:
@@ -367,7 +367,7 @@ def to_tabmat(
367367
else:
368368
# Columns can become sparser, but not denser through interactions
369369
return SparseMatrix(
370-
sps.csc_matrix(self.values[:, numpy.newaxis]), column_names=[self.name]
370+
sps.csc_matrix(self.values[:, np.newaxis]), column_names=[self.name]
371371
)
372372

373373
def get_names(self) -> list[str]:
@@ -394,7 +394,7 @@ def __rmul__(self, other):
394394

395395
def to_tabmat(
396396
self,
397-
dtype: numpy.dtype = numpy.float64,
397+
dtype: np.typing.DTypeLike = np.float64,
398398
sparse_threshold: float = 0.1,
399399
cat_threshold: int = 4,
400400
) -> SparseMatrix:
@@ -413,9 +413,9 @@ def set_name(self, name, name_format=None) -> "_InteractableSparseVector":
413413
class _InteractableCategoricalVector(_InteractableVector):
414414
def __init__(
415415
self,
416-
codes: numpy.ndarray,
416+
codes: np.ndarray,
417417
categories: list[str],
418-
multipliers: numpy.ndarray,
418+
multipliers: np.ndarray,
419419
name: Optional[str] = None,
420420
):
421421
# sentinel values for codes:
@@ -429,15 +429,15 @@ def __init__(
429429
@classmethod
430430
def from_categorical(
431431
cls,
432-
cat: pandas.Categorical,
432+
cat: pd.Categorical,
433433
reduced_rank: bool,
434434
missing_method: str = "fail",
435435
missing_name: str = "(MISSING)",
436436
add_missing_category: bool = False,
437437
) -> "_InteractableCategoricalVector":
438438
"""Create an interactable categorical vector from a pandas categorical."""
439439
categories = list(cat.categories)
440-
codes = cat.codes.copy().astype(numpy.int64)
440+
codes = cat.codes.copy().astype(np.int64)
441441

442442
if reduced_rank:
443443
codes[codes == 0] = -2
@@ -457,7 +457,7 @@ def from_categorical(
457457
return cls(
458458
codes=codes,
459459
categories=categories,
460-
multipliers=numpy.ones(len(cat.codes)),
460+
multipliers=np.ones(len(cat.codes)),
461461
)
462462

463463
def __rmul__(self, other):
@@ -471,7 +471,7 @@ def __rmul__(self, other):
471471

472472
def to_tabmat(
473473
self,
474-
dtype: numpy.dtype = numpy.float64,
474+
dtype: np.typing.DTypeLike = np.float64,
475475
sparse_threshold: float = 0.1,
476476
cat_threshold: int = 4,
477477
) -> Union[DenseMatrix, CategoricalMatrix, SplitMatrix]:
@@ -485,7 +485,7 @@ def to_tabmat(
485485
else:
486486
drop_first = False
487487

488-
cat = pandas.Categorical.from_codes(
488+
cat = pd.Categorical.from_codes(
489489
codes=codes,
490490
categories=categories,
491491
ordered=False,
@@ -502,12 +502,12 @@ def to_tabmat(
502502

503503
if (self.codes == -2).all():
504504
# All values are dropped
505-
return DenseMatrix(numpy.empty((len(codes), 0), dtype=dtype))
505+
return DenseMatrix(np.empty((len(codes), 0), dtype=dtype))
506506
elif (self.multipliers == 1).all() and len(categories) >= cat_threshold:
507507
return categorical_part
508508
else:
509509
sparse_matrix = sps.csc_matrix(
510-
categorical_part.tocsr().multiply(self.multipliers[:, numpy.newaxis])
510+
categorical_part.tocsr().multiply(self.multipliers[:, np.newaxis])
511511
)
512512
(
513513
dense_part,
@@ -744,7 +744,7 @@ def encode_contrasts(
744744
f"Column {data.name} contains unseen categories: {unseen_categories}."
745745
)
746746

747-
cat = pandas.Categorical(data._values, categories=levels)
747+
cat = pd.Categorical(data._values, categories=levels)
748748
_state["categories"] = cat.categories
749749
_state["add_missing_category"] = add_missing_category or (
750750
missing_method == "convert" and cat.isna().any()

0 commit comments

Comments
 (0)