Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions ofxparse/ofxtodataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from ofxparse import OfxParser
import pandas as pd
import codecs
import os.path as path
import sys, warnings
import decimal

# fields of transactions are auto extracted using dir(transactiontype)-{attributes starting with '_'}

def ofx_to_dataframe(fileobjs, id_len=24):
collected_df={}
assert(isinstance(fileobjs, list))
for fileobj in fileobjs:
data = {}

#with codecs.open(fname) as fileobj:
# ofx = OfxParser.parse(fileobj)
ofx = OfxParser.parse(fileobj)
# it seems one ofx file contains only one securities list. Create a mapping from ID to ticker
security_map = {}
if hasattr(ofx, 'security_list'):
security_map.update({x.uniqueid : x.ticker for x in ofx.security_list})
# different transaction types have different fields. So we create df for each txn_type
# and append the contents of each txn into appropriate df
for account in ofx.accounts:
for transaction in account.statement.transactions + \
(hasattr(account.statement, 'positions') and account.statement.positions or []):
txn_type = type(transaction).__name__
transaction.acctnum = account.number
if not txn_type in data:
fields = [x for x in dir(transaction) if not x.startswith('_')]
data[txn_type] = pd.DataFrame(columns=fields)
df = data[txn_type]
fields = set(df.columns)
sr = pd.Series({f: getattr(transaction,f) for f in fields})
data[txn_type] = df.append(sr, ignore_index=True)

# add cash balance as a "Cash" position
cash_amount = None
if hasattr(account.statement, 'balance'):
cash_amount = account.statement.balance
dt = account.statement.balance_date
elif hasattr(account.statement, 'available_cash'):
cash_amount = account.statement.available_cash
dt = account.statement.end_date
if cash_amount is not None:
df = data.get('Position',
pd.DataFrame(columns=['date', 'market_value', 'security', 'unit_price', 'units', 'acctnum']))
sr = pd.Series({
'date' : dt,
'security' : account.curdef,
'market_value': cash_amount,
'units' : cash_amount,
'unit_price' : decimal.Decimal('1.00'),
'acctnum' : account.number})
data['Position'] = df.append(sr, ignore_index=True)

# add fname info into each df. Truncate ID if needed
for key,df in data.items():
df['fname'] = hasattr(fileobj, 'name') and fileobj.name or 'stdin'
if 'id' in df.columns:
df['id'] = df['id'].str[:id_len] # clip the last part of the ID which changes from download to download
if 'security' in df.columns:
df['security'] = df['security'].apply(lambda x: security_map.get(x, x))
if 'AGGREGATE_TYPES' in df.columns :
del df['AGGREGATE_TYPES']
if key in collected_df:
collected_df[key] = collected_df[key].append(df, ignore_index=True)
else:
collected_df[key] = df
return collected_df

__dev_notes__='''
For brokerage, balances are available in account.statement.balance_list... but overall cash is also summarized in account.statement.available_cash corresponding to statement.end_date
For bank, balance is available in account.statement.balance (and balance_date)

'''
17 changes: 16 additions & 1 deletion tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from .support import open_file
from ofxparse import OfxParser, AccountType, Account, Statement, Transaction
from ofxparse.ofxparse import OfxFile, OfxPreprocessedFile, OfxParserException, soup_maker

from ofxparse.ofxtodataframe import ofx_to_dataframe
import glob

class TestOfxFile(TestCase):
OfxFileCls = OfxFile
Expand Down Expand Up @@ -1049,6 +1050,20 @@ def testFailure(self):
self.assertEqual(ofx.signon.severity, 'ERROR')
self.assertEqual(ofx.signon.message, 'Your request could not be processed because you supplied an invalid identification code or your password was incorrect')

class TestOfxToDataFrame(TestCase):
def testSingleFile(self):
dfs = ofx_to_dataframe('tests/fixtures/fidelity.ofx')
self.assertEqual(sorted(dfs), ['InvestmentTransaction', 'Transaction'])
self.assertEqual(len(dfs['InvestmentTransaction']), 14)
self.assertEqual(len(dfs['Transaction']), 3)

def testMultipleFiles(self):
dfs = ofx_to_dataframe(['tests/fixtures/fidelity.ofx', 'tests/fixtures/investment_401k.ofx'])
self.assertEqual(sorted(dfs), ['InvestmentTransaction', 'Transaction'])
self.assertEqual(len(dfs['InvestmentTransaction']), 17)
self.assertEqual(len(dfs['Transaction']), 3)


if __name__ == "__main__":
import unittest
unittest.main()
94 changes: 49 additions & 45 deletions utils/ofx2xlsx.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,57 +1,60 @@
from ofxparse import OfxParser
import pandas as pd
#!/usr/bin/env python3
import warnings
warnings.filterwarnings("ignore", message="numpy.dtype size changed")

from ofxparse.ofxtodataframe import ofx_to_dataframe
import pandas as pd
from pandas import ExcelWriter
import sys
import argparse
from io import StringIO

# TODO automatically extract from transactions
fields = ['id','type', 'date', 'memo', 'payee', 'amount', 'checknum', 'mcc']

# ToDo: Remove duplicate transactions from different files
parser = argparse.ArgumentParser(description='Convert multiple .qfx or .ofx to'
' .xlsx.\n'
'Remove duplicate transactions '
'from different files.\n'
'use fixed columns:'
' %s'%', '.join(fields))
parser.add_argument('files', metavar='*.ofx *.qfx', type=str, nargs='+',
help='.qfx or .ofx file names')
parser.add_argument('--start', type=str, metavar='2014-01-01',
default='2014-01-01',
help="Don't take transaction before this date")
parser.add_argument('--end', type=str, metavar='2014-12-31',
default='2014-12-31',
' .xlsx or csv.\n')
parser.add_argument('files', type=argparse.FileType('r'), nargs='+', #;metavar='*.ofx *.qfx', default=[], type=str, nargs='+',
help='.qfx or .ofx file names')
parser.add_argument('--start', type=str, metavar='1700-01-01',
default='1700-01-01',
help="Don't take transaction before this date")
parser.add_argument('--end', type=str, metavar='3000-12-31',
default='3000-12-31',
help="Don't take transaction after this date")
parser.add_argument('--output', metavar='output.xlsx', type=str,
default='output.xlsx', help='Were to store the xlsx')
parser.add_argument('-o', '--output', metavar='output.csv', type=str,
default='output.csv', help='Were to store the output. Extension determines output format')
parser.add_argument('--id-length', metavar='24', type=int, default=24,
help='Truncate the number of digits in a transaction ID.'
' This is important because this program remove'
' transactions with duplicate IDs (after verifing'
' that they are identical.'
' If you feel unsafe then use a large number but'
'usually the last digits of the transaction ID are'
'running numbers which change from download to download'
' as a result you will have duplicate transactions'
' unless you truncate the ID.')
help='Truncate the number of digits in a transaction ID.'
' This is important because this program remove'
' transactions with duplicate IDs (after verifing'
' that they are identical.'
' If you feel unsafe then use a large number but'
'usually the last digits of the transaction ID are'
'running numbers which change from download to download'
' as a result you will have duplicate transactions'
' unless you truncate the ID.')


args = parser.parse_args()


data = {}
for fname in args.files:
ofx = OfxParser.parse(file(fname))
for account in ofx.accounts:
df = data.get(account.number, pd.DataFrame(columns=fields+['fname']))
for transaction in account.statement.transactions:
s = pd.Series([getattr(transaction,f) for f in fields], index=fields)
s['fname'] = fname.split('/')[-1]
df = df.append(s, ignore_index=True)
df['id'] = df['id'].str[:args.id_length] # clip the last part of the ID which changes from download to download
data[account.number] = df

print "Writing result to", args.output
writer = pd.ExcelWriter(args.output)

if 'stdin' in args.files[0].name:
fp=args.files[0]
args.files=[StringIO(fp.read())]
data = ofx_to_dataframe(args.files)

if 'csv' in args.output:
outstring = ""
for key,df in data.items():
outstring += "##### {}\n".format(key) + df.to_csv(None, index=False, header=True)
if args.output=='output.csv':
print(outstring)
with open(args.output, 'w') as fileobj:
print(outstring, file=fileobj)
elif 'xlsx' in args.output:
writer = pd.ExcelWriter(args.output)
for key,df in data.items():
df.to_excel(writer, sheet_name=key)
writer.save()

__dev_notes__ = '''
for account_number, df in data.iteritems():
# A transaction is identified using all `fields`
# collapse all repeated transactions from the same file into one row
Expand Down Expand Up @@ -88,3 +91,4 @@
df2.to_excel(writer, account_number, index=False)

writer.save()
'''