Skip to content

Commit

Permalink
Merge pull request #11 from PyCampES/interactive_category_skeleton
Browse files Browse the repository at this point in the history
Interactive category skeleton
  • Loading branch information
gilgamezh authored Mar 31, 2024
2 parents 5707091 + 86b5990 commit 1144ef6
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 24 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,7 @@ cython_debug/

samples/
.ruff_cache/
categories_database.json
ficamp.db
gcache.json

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ requests = "^2.31.0"
python-dotenv = "^1.0.1"
numpy = "^1.26.4"
scikit-learn = "^1.4.1.post1"
sqlmodel = "^0.0.16"
questionary = "^2.0.1"

[tool.poetry.group.dev.dependencies]
mypy = "^1.9.0"
Expand Down
168 changes: 152 additions & 16 deletions src/ficamp/__main__.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,177 @@
import argparse
import json
import os
import shutil
from enum import StrEnum

import questionary
from dotenv import load_dotenv
from sqlmodel import Session, SQLModel, create_engine, select

from ficamp.classifier.infer import infer_tx_category
from ficamp.datastructures import Tx
from ficamp.parsers.abn import AbnParser


def cli() -> argparse.Namespace:
"""Parses the first argument from the command line and prints it."""
"""Creates a command line interface with subcommands for import and categorize."""

# Create an argument parser
# Create the main parser
parser = argparse.ArgumentParser(
prog="ficamp", description="Print the first argument from the CLI"
prog="ficamp", description="Parse and categorize your expenses."
)

parser.add_argument("--bank", choices=["abn"], default="abn")
parser.add_argument("filename", help="The spreadsheet to load")
# Create subparsers for the two subcommands
subparsers = parser.add_subparsers(dest="command", required=True)

# Subparser for the import command
import_parser = subparsers.add_parser("import", help="Import a Transactions")
import_parser.add_argument(
"--bank", choices=["abn"], default="abn", help="Specify the bank for the import"
)
import_parser.add_argument("filename", help="File to load")
import_parser.set_defaults(func=import_data)

# Subparser for the categorize command
categorize_parser = subparsers.add_parser(
"categorize", help="Categorize transactions"
)
categorize_parser.add_argument("--infer-category", action="store_true")
categorize_parser.set_defaults(func=categorize)

# Parse the arguments
args = parser.parse_args()

# Print the first argument
return args


def main():
args = cli()
args.filename
args.bank

def import_data(args, engine):
"""Run the parsers."""
print(f"Importing data from {args.filename} for bank {args.bank}.")
# TODO: Build enum for banks
if args.bank == "abn":
parser = AbnParser()
parser.load(args.filename)
transactions = parser.parse()
print(transactions)
# TODO: Add categorizer!
for tx in transactions:
with Session(engine) as session:
# Assuming 'date' and 'amount' can uniquely identify a transaction
statement = select(Tx).where(
Tx.date == tx.date, Tx.amount == tx.amount, Tx.concept == tx.concept
)
result = session.exec(statement).first()
if result is None: # No existing transaction found
session.add(tx)
session.commit()
else:
print(f"Transaction already exists in the database. {tx}")


def get_category_dict(categories_database_path="categories_database.json"):
# FIXME: move categories to SQLITE instead of json file.
if not os.path.exists(categories_database_path):
return {}
with open(categories_database_path, "r") as file:
category_dict = json.load(file)
string_to_category = {
string: category
for category, strings in category_dict.items()
for string in strings
}
return string_to_category


def revert_and_save_dict(string_to_category, filename="categories_database.json"):
# Reverting the dictionary
category_to_strings = {}
for string, category in string_to_category.items():
category_to_strings.setdefault(category, []).append(string)

# Saving to a JSON file
if os.path.exists(filename):
shutil.move(filename, "/tmp/categories_db_bkp.json")
with open(filename, "w") as file:
json.dump(category_to_strings, file, indent=4)


class DefaultAnswers(StrEnum):
SKIP = "Skip this Tx"
NEW = "Type a new category"


def query_business_category(tx, categories_dict, infer_category=False):
# first try to get from the category_dict
category = categories_dict.get(tx.concept)
if category:
return category
# ask the user if we don't know it
categories_choices = list(set(categories_dict.values()))
categories_choices.extend([DefaultAnswers.NEW, DefaultAnswers.SKIP])
default_choice = DefaultAnswers.SKIP
if infer_category:
inferred_category = infer_tx_category(tx)
if inferred_category:
categories_choices.append(inferred_category)
default_choice = inferred_category
print(f"{tx.date.isoformat()} {tx.amount} {tx.concept}")
answer = questionary.select(
"Please select the category for this TX",
choices=categories_choices,
default=default_choice,
show_selected=True,
).ask()
if answer == DefaultAnswers.NEW:
answer = questionary.text("What's the category for the TX above").ask()
if answer == DefaultAnswers.SKIP:
return None
if answer is None:
# https://questionary.readthedocs.io/en/stable/pages/advanced.html#keyboard-interrupts
raise KeyboardInterrupt
if answer:
categories_dict[tx.concept] = answer
category = answer
return category


def categorize(args, engine):
"""Function to categorize transactions."""
categories_dict = get_category_dict()
try:
with Session(engine) as session:
statement = select(Tx).where(Tx.category.is_(None))
results = session.exec(statement).all()
for tx in results:
print(f"Processing {tx}")
tx_category = query_business_category(
tx, categories_dict, infer_category=args.infer_category
)
if tx_category:
print(f"Saving category for {tx.concept}: {tx_category}")
tx.category = tx_category
# update DB
session.add(tx)
session.commit()
revert_and_save_dict(categories_dict)
else:
print("Not saving any category for thi Tx")
revert_and_save_dict(categories_dict)
except KeyboardInterrupt:
print("Closing")


def main():
# create DB
engine = create_engine("sqlite:///ficamp.db")
# create tables
SQLModel.metadata.create_all(engine)

try:
args = cli()
if args.command:
args.func(args, engine)
except KeyboardInterrupt:
print("\nClosing")


load_dotenv()
main()
if __name__ == "__main__":
load_dotenv()
main()
2 changes: 1 addition & 1 deletion src/ficamp/classifier/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def extract_payment_method(d: dict) -> str | dict[str, Any]:
res = "<UNK>"
for method in payment_methods:
if method in d["desc"]:
return method
res = method
return d | {"payment_method": res}


Expand Down
26 changes: 23 additions & 3 deletions src/ficamp/classifier/google_apis.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def search_google_maps(business_name, location=None, api_key=GOOGLE_API_KEY):
response = requests.get(base_url, params=params)
response.raise_for_status()
if response.json()["status"] != "OK":
raise GoogleException(response.json()["error_message"])
raise GoogleException(response.json())
results = response.json().get("results", [])
if results:
# Assuming the first result is the most relevant
Expand All @@ -40,7 +40,7 @@ def get_place_details(place_id):
response = requests.get(url, headers=headers)
response.raise_for_status()
if response.json()["status"] != "OK":
raise GoogleException(response.json()["error_message"])
raise GoogleException(response.json())
return response.json().get("types", [])


Expand All @@ -55,7 +55,7 @@ def query_google_places_new(query):
response = requests.post(url, headers=headers, data=json.dumps(payload))
response.raise_for_status()
if response.json()["status"] != "OK":
raise GoogleException(response.json()["error_message"])
raise GoogleException(response.json())
places = response.json().get("places", [])
if places:
categories = places[0].get("types", [])
Expand All @@ -65,6 +65,7 @@ def query_google_places_new(query):


def find_business_category_in_google(field, location=None):
"""Queries Google maps and try to get a category from it"""
keys_to_remove = ["point_of_interest", "establishment", "store", "department_store"]
# first try using google map places search
place_id_gmaps, categories = search_google_maps(field, location)
Expand All @@ -84,3 +85,22 @@ def find_business_category_in_google(field, location=None):
categories = list(set(categories) - set(keys_to_remove))
return categories[0]
raise GoogleException


def query_gmaps_category(concept):
"""Pycamp internet is slow. saving data locally to go faster"""
with open("gcache.json") as cache_file:
cached = json.load(cache_file)
cached_category = cached.get(concept)
if not cached_category:
try:
gmaps_category = find_business_category_in_google(concept)
except GoogleException:
gmaps_category = "Unknown"
# print(gmaps_category)
with open("gcache.json", "w") as cache_file:
cached[concept] = gmaps_category
json.dump(cached, cache_file)
else:
gmaps_category = cached_category
return gmaps_category
9 changes: 9 additions & 0 deletions src/ficamp/classifier/infer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from ficamp.classifier.google_apis import query_gmaps_category


def infer_tx_category(tx):
"""Will try to guess the category using different actions."""
gmap_category = query_gmaps_category(tx.concept)
if gmap_category != "Unknown":
print(f"Google Maps category is {gmap_category}")
return gmap_category
13 changes: 9 additions & 4 deletions src/ficamp/datastructures.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
from datetime import datetime
from decimal import Decimal
from enum import StrEnum
from typing import Optional

from sqlalchemy import JSON, Column
from sqlmodel import Field, SQLModel


class Currency(StrEnum):
Expand All @@ -23,13 +27,14 @@ class Concept:


@dataclass
class Tx:
class Tx(SQLModel, table=True):
"""Represents a transaction extracted from a bank"""

id: Optional[int] = Field(default=None, primary_key=True)
date: datetime
amount: Decimal
currency: Currency
concept: str | Concept
concept: str
category: None | str
metadata: dict[str, str]
tags: list[str]
tx_metadata: dict[str, str] = Field(sa_column=Column(JSON))
tags: list[str] = Field(sa_column=Column(JSON))

0 comments on commit 1144ef6

Please sign in to comment.