Skip to content

Commit

Permalink
Merge pull request #14 from PyCampES/handle_comma
Browse files Browse the repository at this point in the history
handle comma in description
  • Loading branch information
fabridamicelli authored Mar 31, 2024
2 parents 66991d4 + 46cd613 commit 47fdb66
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/ficamp/classifier/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,21 @@ def remove_pipes(s: str) -> str:
return " ".join(s.split("|"))


def remove_colon(s: str) -> str:
return " ".join(s.split(":"))


def remove_comma(s: str) -> str:
return " ".join(s.split(","))


def preprocess(s: str) -> str:
"Clean up transaction description"
steps = (
lambda s: s.lower(),
remove_pipes,
remove_colon,
remove_comma,
remove_digits,
)
out = s
Expand Down
23 changes: 23 additions & 0 deletions tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from ficamp.classifier.preprocessing import (
preprocess,
remove_colon,
remove_comma,
remove_digits,
remove_pipes,
)
Expand Down Expand Up @@ -35,6 +37,25 @@ def test_remove_pipes(inp, exp):
assert remove_pipes(inp) == exp


@pytest.mark.parametrize(
("inp,exp"),
(
("CSIDNL0213324324324", "CSIDNL0213324324324"),
("CSID:NL0213324324324", "CSID NL0213324324324"),
),
)
def test_remove_colon(inp, exp):
assert remove_colon(inp) == exp


@pytest.mark.parametrize(
("inp,exp"),
(("CSID,NL0213324324324", "CSID NL0213324324324"),),
)
def test_remove_comma(inp, exp):
assert remove_comma(inp) == exp


@pytest.mark.parametrize(
("inp,exp"),
(
Expand All @@ -47,6 +68,8 @@ def test_remove_pipes(inp, exp):
("SEPA 12312321 bic", "sepa bic"),
("SEPA 12312321 123BIC", "sepa"),
("SEPA 1231|AMSTERDAM 123BIC", "sepa amsterdam"),
("CSID:NL0213324324324", "csid"),
("CSID:NL0213324324324 HELLO,world1332", "csid hello"),
),
)
def test_preprocess(inp, exp):
Expand Down

0 comments on commit 47fdb66

Please sign in to comment.