|
6 | 6 |
|
7 | 7 | from bankstatements_core.extraction.row_classifiers import ( |
8 | 8 | AdministrativeClassifier, |
| 9 | + ClassifierRegistry, |
9 | 10 | DefaultMetadataClassifier, |
10 | 11 | FXContinuationClassifier, |
11 | 12 | HeaderMetadataClassifier, |
@@ -390,3 +391,118 @@ def test_looks_like_date(self): |
390 | 391 | assert classifier._looks_like_date("15 December") is True # Full month name |
391 | 392 | assert classifier._looks_like_date("01JAN2023") is True |
392 | 393 | assert classifier._looks_like_date("Not a date") is False |
| 394 | + |
| 395 | + |
| 396 | +class TestClassifierRegistry: |
| 397 | + """Tests for ClassifierRegistry.""" |
| 398 | + |
| 399 | + def test_classifier_priority_order(self): |
| 400 | + """get_priority_order() reflects the declared priority sequence.""" |
| 401 | + registry = ClassifierRegistry( |
| 402 | + [ |
| 403 | + (0, HeaderMetadataClassifier), |
| 404 | + (1, AdministrativeClassifier), |
| 405 | + (2, ReferenceCodeClassifier), |
| 406 | + (3, FXContinuationClassifier), |
| 407 | + (4, TimestampMetadataClassifier), |
| 408 | + (5, TransactionClassifier), |
| 409 | + (6, DefaultMetadataClassifier), |
| 410 | + ] |
| 411 | + ) |
| 412 | + order = registry.get_priority_order() |
| 413 | + assert order[0] == (0, "HeaderMetadataClassifier") |
| 414 | + assert order[5] == (5, "TransactionClassifier") |
| 415 | + assert order[6] == (6, "DefaultMetadataClassifier") |
| 416 | + |
| 417 | + def test_duplicate_priority_raises(self): |
| 418 | + """Duplicate priorities raise ValueError at construction time.""" |
| 419 | + with pytest.raises(ValueError, match="priority 0 already assigned"): |
| 420 | + ClassifierRegistry( |
| 421 | + [ |
| 422 | + (0, HeaderMetadataClassifier), |
| 423 | + (0, TransactionClassifier), |
| 424 | + ] |
| 425 | + ) |
| 426 | + |
| 427 | + def test_non_classifier_subclass_raises(self): |
| 428 | + """Passing a non-RowClassifier class raises TypeError.""" |
| 429 | + with pytest.raises(TypeError): |
| 430 | + ClassifierRegistry([(0, object)]) # type: ignore[list-item] |
| 431 | + |
| 432 | + def test_build_chain_returns_head(self): |
| 433 | + """build_chain() returns a RowClassifier instance.""" |
| 434 | + registry = ClassifierRegistry( |
| 435 | + [ |
| 436 | + (0, HeaderMetadataClassifier), |
| 437 | + (1, DefaultMetadataClassifier), |
| 438 | + ] |
| 439 | + ) |
| 440 | + head = registry.build_chain() |
| 441 | + assert isinstance(head, RowClassifier) |
| 442 | + assert isinstance(head, HeaderMetadataClassifier) |
| 443 | + |
| 444 | + def test_priorities_sorted_regardless_of_input_order(self): |
| 445 | + """Registry sorts by priority even if input is unordered.""" |
| 446 | + registry = ClassifierRegistry( |
| 447 | + [ |
| 448 | + (5, TransactionClassifier), |
| 449 | + (0, HeaderMetadataClassifier), |
| 450 | + (6, DefaultMetadataClassifier), |
| 451 | + ] |
| 452 | + ) |
| 453 | + order = registry.get_priority_order() |
| 454 | + assert order[0] == (0, "HeaderMetadataClassifier") |
| 455 | + assert order[1] == (5, "TransactionClassifier") |
| 456 | + assert order[2] == (6, "DefaultMetadataClassifier") |
| 457 | + |
| 458 | + @pytest.mark.parametrize( |
| 459 | + "row,expected,reason", |
| 460 | + [ |
| 461 | + ( |
| 462 | + {"Date": "date", "Details": "Purchase", "Debit €": "50.00"}, |
| 463 | + "metadata", |
| 464 | + "HeaderMetadata (0) beats Transaction (5) for header-like date value", |
| 465 | + ), |
| 466 | + ( |
| 467 | + { |
| 468 | + "Date": "", |
| 469 | + "Details": "BALANCE FORWARD", |
| 470 | + "Debit €": "", |
| 471 | + "Credit €": "", |
| 472 | + "Balance €": "", |
| 473 | + }, |
| 474 | + "administrative", |
| 475 | + "Administrative (1) beats Transaction (5) for BALANCE FORWARD with no balance", |
| 476 | + ), |
| 477 | + ( |
| 478 | + {"Date": "", "Details": "0.828571", "Debit €": "", "Credit €": ""}, |
| 479 | + "continuation", |
| 480 | + "FXContinuation (3) beats Transaction (5) for exchange-rate-only rows", |
| 481 | + ), |
| 482 | + ], |
| 483 | + ) |
| 484 | + def test_ambiguous_row_priority(self, row, expected, reason): |
| 485 | + """Ambiguous rows resolve to the highest-priority (lowest number) classifier.""" |
| 486 | + chain = create_row_classifier_chain() |
| 487 | + assert chain.classify(row, TEST_COLUMNS) == expected, reason |
| 488 | + |
| 489 | + def test_wrong_order_produces_wrong_result(self): |
| 490 | + """Documents that priority order is not arbitrary — regression guard.""" |
| 491 | + wrong_order_chain = ClassifierRegistry( |
| 492 | + [ |
| 493 | + (0, TransactionClassifier), |
| 494 | + (1, HeaderMetadataClassifier), |
| 495 | + (2, AdministrativeClassifier), |
| 496 | + (3, ReferenceCodeClassifier), |
| 497 | + (4, FXContinuationClassifier), |
| 498 | + (5, TimestampMetadataClassifier), |
| 499 | + (6, DefaultMetadataClassifier), |
| 500 | + ] |
| 501 | + ).build_chain() |
| 502 | + |
| 503 | + ambiguous = {"Date": "date", "Details": "Purchase", "Debit €": "50.00"} |
| 504 | + # With Transaction first, it wins over Header |
| 505 | + assert wrong_order_chain.classify(ambiguous, TEST_COLUMNS) == "transaction" |
| 506 | + # Confirms the correct chain must put HeaderMetadata first |
| 507 | + correct_chain = create_row_classifier_chain() |
| 508 | + assert correct_chain.classify(ambiguous, TEST_COLUMNS) == "metadata" |
0 commit comments