Skip to content

Commit 75c06d4

Browse files
committed
try google colab
add badge
1 parent cefa495 commit 75c06d4

15 files changed

+1982
-0
lines changed
+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
import pandas as pd
4+
import numpy as np
5+
import matplotlib.pyplot as plt
6+
from datetime import datetime, date
7+
8+
sample_size = 500
9+
sigma_e = 3.0 # true value of parameter error sigma
10+
random_num_generator = np.random.RandomState(0)
11+
x = 10.0 * random_num_generator.rand(sample_size)
12+
e = random_num_generator.normal(0, sigma_e, sample_size)
13+
y = 1.0 + 2.0 * x + e # a = 1.0; b = 2.0; y = a + b*x
14+
plt.scatter(x, y, color='blue')
15+
16+
# initial belief
17+
sigma_e = 3.0 # make it known to avoid inverse gamma complexity
18+
a_0 = 0.5
19+
b_0 = 0.5
20+
sigma_a_0 = 0.5
21+
sigma_b_0 = 0.5
22+
beta_0 = np.array([[a_0], [b_0]])
23+
sigma_beta_0 = np.array([[sigma_a_0*sigma_a_0, 0], [0, sigma_b_0*sigma_b_0]])
24+
25+
beta_recorder = [] # record parameter beta
26+
beta_recorder.append(beta_0)
27+
for pair in range(250): # 500 points means 250 pairs
28+
x1 = x[pair*2]
29+
x2 = x[pair*2+1]
30+
y1 = y[pair*2]
31+
y2 = y[pair*2+1]
32+
mu_y = np.array([[(x1*y2-x2*y1)/(x1-x2)], [(y1-y2)/(x1-x2)]])
33+
sigma_y = np.array([[(np.square(x1/(x1-x2))+np.square(x2/(x1-x2)))*np.square(sigma_e),0],
34+
[0,2*np.square(sigma_e/(x1-x2))]])
35+
sigma_beta_1 = np.linalg.inv(np.linalg.inv(sigma_beta_0)+np.linalg.inv(sigma_y))
36+
beta_1 = sigma_beta_1.dot(np.linalg.inv(sigma_beta_0).dot(beta_0) + np.linalg.inv(sigma_y).dot(mu_y))
37+
38+
# assign beta_1 to beta_0
39+
beta_0 = beta_1
40+
sigma_beta_0 = sigma_beta_1
41+
beta_recorder.append(beta_0)
42+
43+
print('pamameters: %.7f, %.7f' %(beta_0[0], beta_0[1]))
44+
45+
# plot the Beyesian dynamics
46+
xfit = np.linspace(0, 10, sample_size)
47+
ytrue = 2.0 * xfit + 1.0 # we know the true value of slope and intercept
48+
plt.plot(xfit, ytrue, label='true line', linewidth=3)
49+
y0 = beta_recorder[0][1] * xfit + beta_recorder[0][0]
50+
plt.plot(xfit, y0, label='initial belief', linewidth=1)
51+
y1 = beta_recorder[1][1] * xfit + beta_recorder[1][0]
52+
plt.plot(xfit, y1, label='1st update', linewidth=1)
53+
y10 = beta_recorder[10][1] * xfit + beta_recorder[10][0]
54+
plt.plot(xfit, y10, label='10th update', linewidth=1)
55+
y100 = beta_recorder[100][1] * xfit + beta_recorder[100][0]
56+
plt.plot(xfit, y100, label='100th update', linewidth=1)
57+
plt.legend()
58+
plt.show()
+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
import pandas as pd
4+
import numpy as np
5+
import matplotlib.pyplot as plt
6+
from datetime import datetime, date
7+
8+
sample_size = 500
9+
sigma_e = 3.0 # true value of parameter error sigma
10+
random_num_generator = np.random.RandomState(0)
11+
x = 10.0 * random_num_generator.rand(sample_size)
12+
e = random_num_generator.normal(0, sigma_e, sample_size)
13+
y = 1.0 + 2.0 * x + e # a = 1.0; b = 2.0; y = a + b*x
14+
plt.scatter(x, y, color='blue')
15+
16+
# normal equation to estimate the model parameters
17+
X = np.vstack((np.ones(sample_size), x)).T
18+
params_closed_form = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
19+
print('pamameters: %.7f, %.7f' %(params_closed_form[0], params_closed_form[1]))
20+
21+
from sklearn.linear_model import LinearRegression
22+
# The next two lines does the regression
23+
lm_model = LinearRegression(copy_X=True, fit_intercept=True, normalize=False)
24+
lm_model.fit(x.reshape(-1,1), y) # fit() expects 2D array
25+
print('pamameters: %.7f, %.7f' %(lm_model.intercept_, lm_model.coef_))
26+
27+
# present the graph
28+
xfit = np.linspace(0, 10, sample_size)
29+
yfit = lm_model.predict(xfit.reshape(-1,1))
30+
ytrue = 2.0 * xfit + 1.0 # we know the true value of slope and intercept
31+
plt.scatter(x, y, color='blue')
32+
plt.plot(xfit, yfit, color='red', label='fitted line', linewidth=3)
33+
plt.plot(xfit, ytrue, color='green', label='true line', linewidth=3)
34+
plt.legend()
35+
36+
# R-Square
37+
r_square = lm_model.score(x.reshape(-1,1), y)
38+
print('R-Square %.7f' %(r_square))
39+
40+
from scipy.stats.stats import pearsonr
41+
# The square root of R-Square is correlation coefficient
42+
print('Its square root is Pearson correlation coefficient: %.7f == %.7f' %(np.sqrt(r_square), pearsonr(x, y)[0]))
+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
import os
4+
import pandas as pd
5+
import numpy as np
6+
import matplotlib.pyplot as plt
7+
from datetime import datetime, date
8+
9+
#################################################### Data #####################################################
10+
hist_file = os.path.join('hist/', '%s.csv' % 'EWA US Equity')
11+
ewa_price = pd.read_csv(hist_file, header=0, parse_dates=True, sep=',', index_col=0)
12+
ewa_price = ewa_price['Price']
13+
ewa_price.name = 'EWA US Equity'
14+
15+
hist_file = os.path.join('hist/', '%s.csv' % 'EWC US Equity')
16+
ewc_price = pd.read_csv(hist_file, header=0, parse_dates=True, sep=',', index_col=0)
17+
ewc_price = ewc_price['Price']
18+
ewc_price.name = 'EWC US Equity'
19+
20+
data = pd.concat([ewa_price, ewc_price], axis=1)
21+
# print(data[data.isnull().any(axis=1)])
22+
data.dropna(axis=0, how='any',inplace=True)
23+
24+
from sklearn.linear_model import LinearRegression
25+
# The next two lines does the regression
26+
lm_model = LinearRegression(copy_X=True, fit_intercept=True, normalize=False)
27+
lm_model.fit(data['EWA US Equity'].values.reshape(-1,1), data['EWC US Equity'].values) # fit() expects 2D array
28+
print('pamameters: %.7f, %.7f' %(lm_model.intercept_, lm_model.coef_))
29+
30+
# present the graph
31+
fig, ax = plt.subplots(nrows=1, ncols=2)
32+
ax[0].set_title('EWA vs EWC')
33+
ax[0].plot(data)
34+
yfit = lm_model.coef_ * data['EWA US Equity'] + lm_model.intercept_
35+
y_residual = data['EWC US Equity'] - yfit
36+
ax[1].set_title('Regression Residual')
37+
ax[1].plot(y_residual)
38+
plt.show()
39+
40+
from scipy.stats.stats import pearsonr
41+
print('Pearson correlation coefficient:%.7f' %(pearsonr(data['EWA US Equity'], data['EWC US Equity'])[0]))
42+
####################################### CADF #####################################################
43+
import statsmodels.tsa.stattools as ts
44+
ts.adfuller(y_residual, 1) # lag = 1
45+
# (-3.667485117146333,
46+
# 0.0045944586170011716,
47+
# 1,
48+
# 4560,
49+
# {'1%': -3.431784865122899,
50+
# '5%': -2.8621740417619224,
51+
# '10%': -2.5671075035106954},
52+
# 625.5003218990623)
53+
54+
lm_model = LinearRegression(copy_X=True, fit_intercept=True, normalize=False)
55+
lm_model.fit(data['EWC US Equity'].values.reshape(-1,1), data['EWA US Equity'].values) # fit() expects 2D array
56+
print('pamameters: %.7f, %.7f' %(lm_model.intercept_, lm_model.coef_))
57+
yfit = lm_model.coef_ * data['EWC US Equity'] + lm_model.intercept_
58+
y_residual = data['EWA US Equity'] - yfit
59+
ts.adfuller(y_residual, 1) # lag = 1
60+
# statistic = -3.797221868633519
61+
62+
####################################### Johansen #####################################################
63+
from statsmodels.tsa.vector_ar.vecm import coint_johansen
64+
65+
jh_results = coint_johansen(data, 0, 1) # 0 - constant term; 1 - log 1
66+
print(jh_results.lr1) # dim = (n,) Trace statistic
67+
print(jh_results.cvt) # dim = (n,3) critical value table (90%, 95%, 99%)
68+
print(jh_results.evec) # dim = (n, n), columnwise eigen-vectors
69+
v1 = jh_results.evec[:, 0]
70+
v2 = jh_results.evec[:, 1]
71+
72+
# [21.44412674 3.64194243] # trace statistic
73+
# [[13.4294 15.4943 19.9349] # r = 0 critical values
74+
# [ 2.7055 3.8415 6.6349]] # r <= 1 critical values
75+
# [[ 0.53474958 0.02398649] # eigenvectors
76+
# [-0.45169106 0.12036402]]

0 commit comments

Comments
 (0)