Skip to content

Commit

Permalink
Add test for raising on unseen categories
Browse files Browse the repository at this point in the history
  • Loading branch information
stanmart committed Jan 22, 2024
1 parent 06f0f4a commit 4d829a4
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/tabmat/formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ def encode_contrasts(
force_convert = _state.get("force_convert", False)

if levels is not None:
unseen_categories = set(data.unique()) - set(levels)
unseen_categories = set(data.dropna().unique()) - set(levels)
if unseen_categories:
raise ValueError(
f"Column {data.name} contains unseen categories: {unseen_categories}."
Expand Down
22 changes: 22 additions & 0 deletions tests/test_formula.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,28 @@ def test_cat_missing_interactions():
assert tm.from_formula(formula, df).column_names == expected_names


@pytest.mark.parametrize(
"cat_missing_method", ["zero", "convert"], ids=["zero", "convert"]
)
def test_unseen_category(cat_missing_method):
df = pd.DataFrame(
{
"cat_1": pd.Categorical(["a", "b"]),
}
)
df_unseen = pd.DataFrame(
{
"cat_1": pd.Categorical(["a", "b", "c"]),
}
)
result_seen = tm.from_formula(
"cat_1 - 1", df, cat_missing_method=cat_missing_method
)

with pytest.raises(ValueError, match="contains unseen categories"):
result_seen.model_spec.get_model_matrix(df_unseen)


# Tests from formulaic's test suite
# ---------------------------------

Expand Down

0 comments on commit 4d829a4

Please sign in to comment.