diff --git a/ofxparse/ofxtodataframe.py b/ofxparse/ofxtodataframe.py new file mode 100644 index 0000000..621cd51 --- /dev/null +++ b/ofxparse/ofxtodataframe.py @@ -0,0 +1,77 @@ +from ofxparse import OfxParser +import pandas as pd +import codecs +import os.path as path +import sys, warnings +import decimal + +# fields of transactions are auto extracted using dir(transactiontype)-{attributes starting with '_'} + +def ofx_to_dataframe(fileobjs, id_len=24): + collected_df={} + assert(isinstance(fileobjs, list)) + for fileobj in fileobjs: + data = {} + + #with codecs.open(fname) as fileobj: + # ofx = OfxParser.parse(fileobj) + ofx = OfxParser.parse(fileobj) + # it seems one ofx file contains only one securities list. Create a mapping from ID to ticker + security_map = {} + if hasattr(ofx, 'security_list'): + security_map.update({x.uniqueid : x.ticker for x in ofx.security_list}) + # different transaction types have different fields. So we create df for each txn_type + # and append the contents of each txn into appropriate df + for account in ofx.accounts: + for transaction in account.statement.transactions + \ + (hasattr(account.statement, 'positions') and account.statement.positions or []): + txn_type = type(transaction).__name__ + transaction.acctnum = account.number + if not txn_type in data: + fields = [x for x in dir(transaction) if not x.startswith('_')] + data[txn_type] = pd.DataFrame(columns=fields) + df = data[txn_type] + fields = set(df.columns) + sr = pd.Series({f: getattr(transaction,f) for f in fields}) + data[txn_type] = df.append(sr, ignore_index=True) + + # add cash balance as a "Cash" position + cash_amount = None + if hasattr(account.statement, 'balance'): + cash_amount = account.statement.balance + dt = account.statement.balance_date + elif hasattr(account.statement, 'available_cash'): + cash_amount = account.statement.available_cash + dt = account.statement.end_date + if cash_amount is not None: + df = data.get('Position', + pd.DataFrame(columns=['date', 'market_value', 'security', 'unit_price', 'units', 'acctnum'])) + sr = pd.Series({ + 'date' : dt, + 'security' : account.curdef, + 'market_value': cash_amount, + 'units' : cash_amount, + 'unit_price' : decimal.Decimal('1.00'), + 'acctnum' : account.number}) + data['Position'] = df.append(sr, ignore_index=True) + + # add fname info into each df. Truncate ID if needed + for key,df in data.items(): + df['fname'] = hasattr(fileobj, 'name') and fileobj.name or 'stdin' + if 'id' in df.columns: + df['id'] = df['id'].str[:id_len] # clip the last part of the ID which changes from download to download + if 'security' in df.columns: + df['security'] = df['security'].apply(lambda x: security_map.get(x, x)) + if 'AGGREGATE_TYPES' in df.columns : + del df['AGGREGATE_TYPES'] + if key in collected_df: + collected_df[key] = collected_df[key].append(df, ignore_index=True) + else: + collected_df[key] = df + return collected_df + +__dev_notes__=''' +For brokerage, balances are available in account.statement.balance_list... but overall cash is also summarized in account.statement.available_cash corresponding to statement.end_date +For bank, balance is available in account.statement.balance (and balance_date) + +''' diff --git a/tests/test_parse.py b/tests/test_parse.py index 78bd779..dc9dd78 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -12,7 +12,8 @@ from .support import open_file from ofxparse import OfxParser, AccountType, Account, Statement, Transaction from ofxparse.ofxparse import OfxFile, OfxPreprocessedFile, OfxParserException, soup_maker - +from ofxparse.ofxtodataframe import ofx_to_dataframe +import glob class TestOfxFile(TestCase): OfxFileCls = OfxFile @@ -1049,6 +1050,20 @@ def testFailure(self): self.assertEqual(ofx.signon.severity, 'ERROR') self.assertEqual(ofx.signon.message, 'Your request could not be processed because you supplied an invalid identification code or your password was incorrect') +class TestOfxToDataFrame(TestCase): + def testSingleFile(self): + dfs = ofx_to_dataframe('tests/fixtures/fidelity.ofx') + self.assertEqual(sorted(dfs), ['InvestmentTransaction', 'Transaction']) + self.assertEqual(len(dfs['InvestmentTransaction']), 14) + self.assertEqual(len(dfs['Transaction']), 3) + + def testMultipleFiles(self): + dfs = ofx_to_dataframe(['tests/fixtures/fidelity.ofx', 'tests/fixtures/investment_401k.ofx']) + self.assertEqual(sorted(dfs), ['InvestmentTransaction', 'Transaction']) + self.assertEqual(len(dfs['InvestmentTransaction']), 17) + self.assertEqual(len(dfs['Transaction']), 3) + + if __name__ == "__main__": import unittest unittest.main() diff --git a/utils/ofx2xlsx.py b/utils/ofx2xlsx.py old mode 100644 new mode 100755 index 32b19c6..f8fc09c --- a/utils/ofx2xlsx.py +++ b/utils/ofx2xlsx.py @@ -1,57 +1,60 @@ -from ofxparse import OfxParser -import pandas as pd +#!/usr/bin/env python3 +import warnings +warnings.filterwarnings("ignore", message="numpy.dtype size changed") +from ofxparse.ofxtodataframe import ofx_to_dataframe +import pandas as pd +from pandas import ExcelWriter +import sys import argparse +from io import StringIO -# TODO automatically extract from transactions -fields = ['id','type', 'date', 'memo', 'payee', 'amount', 'checknum', 'mcc'] - +# ToDo: Remove duplicate transactions from different files parser = argparse.ArgumentParser(description='Convert multiple .qfx or .ofx to' - ' .xlsx.\n' - 'Remove duplicate transactions ' - 'from different files.\n' - 'use fixed columns:' - ' %s'%', '.join(fields)) -parser.add_argument('files', metavar='*.ofx *.qfx', type=str, nargs='+', - help='.qfx or .ofx file names') -parser.add_argument('--start', type=str, metavar='2014-01-01', - default='2014-01-01', - help="Don't take transaction before this date") -parser.add_argument('--end', type=str, metavar='2014-12-31', - default='2014-12-31', + ' .xlsx or csv.\n') +parser.add_argument('files', type=argparse.FileType('r'), nargs='+', #;metavar='*.ofx *.qfx', default=[], type=str, nargs='+', +help='.qfx or .ofx file names') +parser.add_argument('--start', type=str, metavar='1700-01-01', + default='1700-01-01', + help="Don't take transaction before this date") +parser.add_argument('--end', type=str, metavar='3000-12-31', + default='3000-12-31', help="Don't take transaction after this date") -parser.add_argument('--output', metavar='output.xlsx', type=str, - default='output.xlsx', help='Were to store the xlsx') +parser.add_argument('-o', '--output', metavar='output.csv', type=str, + default='output.csv', help='Were to store the output. Extension determines output format') parser.add_argument('--id-length', metavar='24', type=int, default=24, - help='Truncate the number of digits in a transaction ID.' - ' This is important because this program remove' - ' transactions with duplicate IDs (after verifing' - ' that they are identical.' - ' If you feel unsafe then use a large number but' - 'usually the last digits of the transaction ID are' - 'running numbers which change from download to download' - ' as a result you will have duplicate transactions' - ' unless you truncate the ID.') + help='Truncate the number of digits in a transaction ID.' + ' This is important because this program remove' + ' transactions with duplicate IDs (after verifing' + ' that they are identical.' + ' If you feel unsafe then use a large number but' + 'usually the last digits of the transaction ID are' + 'running numbers which change from download to download' + ' as a result you will have duplicate transactions' + ' unless you truncate the ID.') args = parser.parse_args() - - -data = {} -for fname in args.files: - ofx = OfxParser.parse(file(fname)) - for account in ofx.accounts: - df = data.get(account.number, pd.DataFrame(columns=fields+['fname'])) - for transaction in account.statement.transactions: - s = pd.Series([getattr(transaction,f) for f in fields], index=fields) - s['fname'] = fname.split('/')[-1] - df = df.append(s, ignore_index=True) - df['id'] = df['id'].str[:args.id_length] # clip the last part of the ID which changes from download to download - data[account.number] = df - -print "Writing result to", args.output -writer = pd.ExcelWriter(args.output) - +if 'stdin' in args.files[0].name: + fp=args.files[0] + args.files=[StringIO(fp.read())] +data = ofx_to_dataframe(args.files) + +if 'csv' in args.output: + outstring = "" + for key,df in data.items(): + outstring += "##### {}\n".format(key) + df.to_csv(None, index=False, header=True) + if args.output=='output.csv': + print(outstring) + with open(args.output, 'w') as fileobj: + print(outstring, file=fileobj) +elif 'xlsx' in args.output: + writer = pd.ExcelWriter(args.output) + for key,df in data.items(): + df.to_excel(writer, sheet_name=key) + writer.save() + +__dev_notes__ = ''' for account_number, df in data.iteritems(): # A transaction is identified using all `fields` # collapse all repeated transactions from the same file into one row @@ -88,3 +91,4 @@ df2.to_excel(writer, account_number, index=False) writer.save() +'''