1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ import os
4
+ import pandas as pd
5
+ import numpy as np
6
+ import matplotlib .pyplot as plt
7
+ from datetime import datetime , date
8
+
9
+ #################################################### Data #####################################################
10
+ hist_file = os .path .join ('hist/' , '%s.csv' % 'EWA US Equity' )
11
+ ewa_price = pd .read_csv (hist_file , header = 0 , parse_dates = True , sep = ',' , index_col = 0 )
12
+ ewa_price = ewa_price ['Price' ]
13
+ ewa_price .name = 'EWA US Equity'
14
+
15
+ hist_file = os .path .join ('hist/' , '%s.csv' % 'EWC US Equity' )
16
+ ewc_price = pd .read_csv (hist_file , header = 0 , parse_dates = True , sep = ',' , index_col = 0 )
17
+ ewc_price = ewc_price ['Price' ]
18
+ ewc_price .name = 'EWC US Equity'
19
+
20
+ data = pd .concat ([ewa_price , ewc_price ], axis = 1 )
21
+ # print(data[data.isnull().any(axis=1)])
22
+ data .dropna (axis = 0 , how = 'any' ,inplace = True )
23
+
24
+ from sklearn .linear_model import LinearRegression
25
+ # The next two lines does the regression
26
+ lm_model = LinearRegression (copy_X = True , fit_intercept = True , normalize = False )
27
+ lm_model .fit (data ['EWA US Equity' ].values .reshape (- 1 ,1 ), data ['EWC US Equity' ].values ) # fit() expects 2D array
28
+ print ('pamameters: %.7f, %.7f' % (lm_model .intercept_ , lm_model .coef_ ))
29
+
30
+ # present the graph
31
+ fig , ax = plt .subplots (nrows = 1 , ncols = 2 )
32
+ ax [0 ].set_title ('EWA vs EWC' )
33
+ ax [0 ].plot (data )
34
+ yfit = lm_model .coef_ * data ['EWA US Equity' ] + lm_model .intercept_
35
+ y_residual = data ['EWC US Equity' ] - yfit
36
+ ax [1 ].set_title ('Regression Residual' )
37
+ ax [1 ].plot (y_residual )
38
+ plt .show ()
39
+
40
+ from scipy .stats .stats import pearsonr
41
+ print ('Pearson correlation coefficient:%.7f' % (pearsonr (data ['EWA US Equity' ], data ['EWC US Equity' ])[0 ]))
42
+ ####################################### CADF #####################################################
43
+ import statsmodels .tsa .stattools as ts
44
+ ts .adfuller (y_residual , 1 ) # lag = 1
45
+ # (-3.667485117146333,
46
+ # 0.0045944586170011716,
47
+ # 1,
48
+ # 4560,
49
+ # {'1%': -3.431784865122899,
50
+ # '5%': -2.8621740417619224,
51
+ # '10%': -2.5671075035106954},
52
+ # 625.5003218990623)
53
+
54
+ lm_model = LinearRegression (copy_X = True , fit_intercept = True , normalize = False )
55
+ lm_model .fit (data ['EWC US Equity' ].values .reshape (- 1 ,1 ), data ['EWA US Equity' ].values ) # fit() expects 2D array
56
+ print ('pamameters: %.7f, %.7f' % (lm_model .intercept_ , lm_model .coef_ ))
57
+ yfit = lm_model .coef_ * data ['EWC US Equity' ] + lm_model .intercept_
58
+ y_residual = data ['EWA US Equity' ] - yfit
59
+ ts .adfuller (y_residual , 1 ) # lag = 1
60
+ # statistic = -3.797221868633519
61
+
62
+ ####################################### Johansen #####################################################
63
+ from statsmodels .tsa .vector_ar .vecm import coint_johansen
64
+
65
+ jh_results = coint_johansen (data , 0 , 1 ) # 0 - constant term; 1 - log 1
66
+ print (jh_results .lr1 ) # dim = (n,) Trace statistic
67
+ print (jh_results .cvt ) # dim = (n,3) critical value table (90%, 95%, 99%)
68
+ print (jh_results .evec ) # dim = (n, n), columnwise eigen-vectors
69
+ v1 = jh_results .evec [:, 0 ]
70
+ v2 = jh_results .evec [:, 1 ]
71
+
72
+ # [21.44412674 3.64194243] # trace statistic
73
+ # [[13.4294 15.4943 19.9349] # r = 0 critical values
74
+ # [ 2.7055 3.8415 6.6349]] # r <= 1 critical values
75
+ # [[ 0.53474958 0.02398649] # eigenvectors
76
+ # [-0.45169106 0.12036402]]
0 commit comments