From 2644d314372fe3f979c84e104066a0e41f762cdb Mon Sep 17 00:00:00 2001 From: Martin Stancsics Date: Thu, 25 Jan 2024 10:43:38 +0100 Subject: [PATCH] Add comment about dropping missings in tests for new levels --- src/tabmat/formula.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tabmat/formula.py b/src/tabmat/formula.py index 7acc821b..0afe6df1 100644 --- a/src/tabmat/formula.py +++ b/src/tabmat/formula.py @@ -728,6 +728,9 @@ def encode_contrasts( # Check for unseen categories when levels are specified if levels is not None: if missing_method == "convert" and not add_missing_category: + # We only need to include NAs in the check in this case because: + # - missing_method == "fail" raises a more appropriate error later + # - missings are no problem in the other cases unseen_categories = set(data.unique()) - set(levels) else: unseen_categories = set(data.dropna().unique()) - set(levels)