Skip to content

Commit 4d829a4

Browse files
committed
Add test for raising on unseen categories
1 parent 06f0f4a commit 4d829a4

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

src/tabmat/formula.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,7 @@ def encode_contrasts(
726726
force_convert = _state.get("force_convert", False)
727727

728728
if levels is not None:
729-
unseen_categories = set(data.unique()) - set(levels)
729+
unseen_categories = set(data.dropna().unique()) - set(levels)
730730
if unseen_categories:
731731
raise ValueError(
732732
f"Column {data.name} contains unseen categories: {unseen_categories}."

tests/test_formula.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,28 @@ def test_cat_missing_interactions():
746746
assert tm.from_formula(formula, df).column_names == expected_names
747747

748748

749+
@pytest.mark.parametrize(
750+
"cat_missing_method", ["zero", "convert"], ids=["zero", "convert"]
751+
)
752+
def test_unseen_category(cat_missing_method):
753+
df = pd.DataFrame(
754+
{
755+
"cat_1": pd.Categorical(["a", "b"]),
756+
}
757+
)
758+
df_unseen = pd.DataFrame(
759+
{
760+
"cat_1": pd.Categorical(["a", "b", "c"]),
761+
}
762+
)
763+
result_seen = tm.from_formula(
764+
"cat_1 - 1", df, cat_missing_method=cat_missing_method
765+
)
766+
767+
with pytest.raises(ValueError, match="contains unseen categories"):
768+
result_seen.model_spec.get_model_matrix(df_unseen)
769+
770+
749771
# Tests from formulaic's test suite
750772
# ---------------------------------
751773

0 commit comments

Comments
 (0)