jseutter · mbkamble · Aug 24, 2018 · Sep 17, 2018 · Apr 20, 2020
diff --git a/ofxparse/ofxtodataframe.py b/ofxparse/ofxtodataframe.py
@@ -0,0 +1,77 @@
+from ofxparse import OfxParser
+import pandas as pd
+import codecs
+import os.path as path
+import sys, warnings
+import decimal
+
+# fields of transactions are auto extracted using dir(transactiontype)-{attributes starting with '_'}
+
+def ofx_to_dataframe(fileobjs, id_len=24):
+    collected_df={}
+    assert(isinstance(fileobjs, list))
+    for fileobj in fileobjs:
+        data = {}
+
+        #with codecs.open(fname) as fileobj:
+        #    ofx = OfxParser.parse(fileobj)
+        ofx = OfxParser.parse(fileobj)
+        # it seems one ofx file contains only one securities list. Create a mapping from ID to ticker
+        security_map = {}
+        if hasattr(ofx, 'security_list'):
+            security_map.update({x.uniqueid : x.ticker for x in ofx.security_list})
+        # different transaction types have different fields. So we create df for each txn_type
+        # and append the contents of each txn into appropriate df
+        for account in ofx.accounts:
+            for transaction in account.statement.transactions + \
+                (hasattr(account.statement, 'positions') and account.statement.positions or []):
+                txn_type = type(transaction).__name__
+                transaction.acctnum = account.number
+                if not txn_type in data:
+                    fields = [x for x in dir(transaction) if not x.startswith('_')]
+                    data[txn_type] = pd.DataFrame(columns=fields)
+                df = data[txn_type]
+                fields = set(df.columns)
+                sr = pd.Series({f: getattr(transaction,f) for f in fields})
+                data[txn_type] = df.append(sr, ignore_index=True)
+
+            # add cash balance as a "Cash" position
+            cash_amount = None
+            if hasattr(account.statement, 'balance'):
+                cash_amount = account.statement.balance
+                dt = account.statement.balance_date
+            elif hasattr(account.statement, 'available_cash'):
+                cash_amount = account.statement.available_cash
+                dt = account.statement.end_date
+            if cash_amount is not None:
+                df = data.get('Position',
+                              pd.DataFrame(columns=['date', 'market_value', 'security', 'unit_price', 'units', 'acctnum']))
+                sr = pd.Series({
+                    'date'        : dt,
+                    'security'    : account.curdef,
+                    'market_value': cash_amount,
+                    'units'       : cash_amount,
+                    'unit_price'  : decimal.Decimal('1.00'),
+                    'acctnum'     : account.number})
+                data['Position'] = df.append(sr, ignore_index=True)
+
+        # add fname info into each df. Truncate ID if needed
+        for key,df in data.items():
+            df['fname'] = hasattr(fileobj, 'name') and fileobj.name or 'stdin'
+            if 'id' in df.columns:
+                df['id'] = df['id'].str[:id_len]  # clip the last part of the ID which changes from download to download
+            if 'security' in df.columns:
+                df['security'] = df['security'].apply(lambda x: security_map.get(x, x))
+            if 'AGGREGATE_TYPES' in df.columns :
+                del df['AGGREGATE_TYPES']
+            if key in collected_df:
+                collected_df[key] = collected_df[key].append(df, ignore_index=True)
+            else:
+                collected_df[key] = df
+    return collected_df
+
+__dev_notes__='''
+For brokerage, balances are available in account.statement.balance_list... but overall cash is also summarized in account.statement.available_cash corresponding to statement.end_date
+For bank, balance is available in account.statement.balance (and balance_date)
+
+'''
diff --git a/tests/test_parse.py b/tests/test_parse.py
@@ -12,7 +12,8 @@
 from .support import open_file
 from ofxparse import OfxParser, AccountType, Account, Statement, Transaction
 from ofxparse.ofxparse import OfxFile, OfxPreprocessedFile, OfxParserException, soup_maker
-
+from ofxparse.ofxtodataframe import ofx_to_dataframe
+import glob
 
 class TestOfxFile(TestCase):
     OfxFileCls = OfxFile
@@ -1049,6 +1050,20 @@ def testFailure(self):
         self.assertEqual(ofx.signon.severity, 'ERROR')
         self.assertEqual(ofx.signon.message, 'Your request could not be processed because you supplied an invalid identification code or your password was incorrect')
 
+class TestOfxToDataFrame(TestCase):
+    def testSingleFile(self):
+        dfs = ofx_to_dataframe('tests/fixtures/fidelity.ofx')
+        self.assertEqual(sorted(dfs), ['InvestmentTransaction', 'Transaction'])
+        self.assertEqual(len(dfs['InvestmentTransaction']), 14)
+        self.assertEqual(len(dfs['Transaction']), 3)
+
+    def testMultipleFiles(self):
+        dfs = ofx_to_dataframe(['tests/fixtures/fidelity.ofx', 'tests/fixtures/investment_401k.ofx'])
+        self.assertEqual(sorted(dfs), ['InvestmentTransaction', 'Transaction'])
+        self.assertEqual(len(dfs['InvestmentTransaction']), 17)
+        self.assertEqual(len(dfs['Transaction']), 3)
+
+
 if __name__ == "__main__":
     import unittest
     unittest.main()
diff --git a/utils/ofx2xlsx.py b/utils/ofx2xlsx.py
@@ -1,57 +1,60 @@
-from ofxparse import OfxParser
-import pandas as pd
+#!/usr/bin/env python3
+import warnings
+warnings.filterwarnings("ignore", message="numpy.dtype size changed")
 
+from ofxparse.ofxtodataframe import ofx_to_dataframe
+import pandas as pd
+from pandas import ExcelWriter
+import sys
 import argparse
+from io import StringIO
 
-# TODO automatically extract from transactions
-fields = ['id','type', 'date', 'memo', 'payee', 'amount', 'checknum', 'mcc']
-
+# ToDo: Remove duplicate transactions from different files
 parser = argparse.ArgumentParser(description='Convert multiple .qfx or .ofx to'
-                                             ' .xlsx.\n'
-                                             'Remove duplicate transactions '
-                                             'from different files.\n'
-                                             'use fixed columns:'
-                                             ' %s'%', '.join(fields))
-parser.add_argument('files', metavar='*.ofx *.qfx', type=str, nargs='+',
-                   help='.qfx or .ofx file names')
-parser.add_argument('--start', type=str, metavar='2014-01-01',
-                    default='2014-01-01',
-                   help="Don't take transaction before this date")
-parser.add_argument('--end', type=str, metavar='2014-12-31',
-                    default='2014-12-31',
+                                             ' .xlsx or csv.\n')
+parser.add_argument('files', type=argparse.FileType('r'), nargs='+',   #;metavar='*.ofx *.qfx', default=[], type=str, nargs='+',
+help='.qfx or .ofx file names')
+parser.add_argument('--start', type=str, metavar='1700-01-01',
+                    default='1700-01-01',
+                    help="Don't take transaction before this date")
+parser.add_argument('--end', type=str, metavar='3000-12-31',
+                    default='3000-12-31',
                     help="Don't take transaction after this date")
-parser.add_argument('--output', metavar='output.xlsx', type=str,
-                    default='output.xlsx', help='Were to store the xlsx')
+parser.add_argument('-o', '--output', metavar='output.csv', type=str,
+                    default='output.csv', help='Were to store the output. Extension determines output format')
 parser.add_argument('--id-length', metavar='24', type=int, default=24,
-                   help='Truncate the number of digits in a transaction ID.'
-                        ' This is important because this program remove'
-                        ' transactions with duplicate IDs (after verifing'
-                        ' that they are identical.'
-                        ' If you feel unsafe then use a large number but'
-                        'usually the last digits of the transaction ID are'
-                        'running numbers which change from download to download'
-                        ' as a result you will have duplicate transactions'
-                        ' unless you truncate the ID.')
+                    help='Truncate the number of digits in a transaction ID.'
+                    ' This is important because this program remove'
+                    ' transactions with duplicate IDs (after verifing'
+                    ' that they are identical.'
+                    ' If you feel unsafe then use a large number but'
+                    'usually the last digits of the transaction ID are'
+                    'running numbers which change from download to download'
+                    ' as a result you will have duplicate transactions'
+                    ' unless you truncate the ID.')
 
 
 args = parser.parse_args()
-
-
-data = {}
-for fname in args.files:
-    ofx = OfxParser.parse(file(fname))
-    for account in ofx.accounts:
-        df = data.get(account.number, pd.DataFrame(columns=fields+['fname']))
-        for transaction in account.statement.transactions:
-            s = pd.Series([getattr(transaction,f) for f in fields], index=fields)
-            s['fname'] = fname.split('/')[-1]
-            df = df.append(s, ignore_index=True)
-        df['id'] = df['id'].str[:args.id_length]  # clip the last part of the ID which changes from download to download
-        data[account.number] = df
-
-print "Writing result to", args.output
-writer = pd.ExcelWriter(args.output)
-
+if 'stdin' in args.files[0].name:
+    fp=args.files[0]
+    args.files=[StringIO(fp.read())]
+data = ofx_to_dataframe(args.files)
+
+if 'csv' in args.output:
+    outstring = ""
+    for key,df in data.items():
+        outstring += "##### {}\n".format(key) + df.to_csv(None, index=False, header=True)
+    if args.output=='output.csv':
+        print(outstring)
+    with open(args.output, 'w') as fileobj:
+        print(outstring, file=fileobj)
+elif 'xlsx' in args.output:
+    writer = pd.ExcelWriter(args.output)
+    for key,df in data.items():
+        df.to_excel(writer, sheet_name=key)
+    writer.save()
+
+__dev_notes__ = '''
 for account_number, df in data.iteritems():
     # A transaction is identified using all `fields`
     # collapse all repeated transactions from the same file into one row
@@ -88,3 +91,4 @@
     df2.to_excel(writer, account_number, index=False)
 
 writer.save()
+'''