Skip to content

Commit ada9681

Browse files
Add files via upload
1 parent 76c07a9 commit ada9681

File tree

64 files changed

+203670
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+203670
-0
lines changed

Deep Learning using Tensorflow Keras/Anomaly-Detection-LSTM-Autoencoder/Anomaly Detection Keras AELSTM.ipynb

+29,765
Large diffs are not rendered by default.
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Loading
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
# Anomaly-Detection-LSTM-Autoencoder
2+
3+
## Import Library
4+
```
5+
import numpy as np
6+
import tensorflow as tf
7+
import pandas as pd
8+
pd.options.mode.chained_assignment = None
9+
import seaborn as sns
10+
from matplotlib.pylab import rcParams
11+
import matplotlib.pyplot as plt
12+
import plotly.express as px
13+
import plotly.graph_objects as go
14+
15+
%matplotlib inline
16+
17+
sns.set(style='whitegrid', palette='muted')
18+
rcParams['figure.figsize'] = 14, 8
19+
np.random.seed(1)
20+
tf.random.set_seed(1)
21+
22+
print('Tensorflow version:', tf.__version__)
23+
```
24+
25+
## Load Data
26+
```
27+
df = pd.read_csv('S&P_500_Index_Data.csv', parse_dates=['date'])
28+
df.head()
29+
df.shape
30+
```
31+
32+
## Plot Data
33+
34+
```
35+
fig = go.Figure()
36+
fig.add_trace(go.Scatter(x=df.date, y=df.close,
37+
mode='lines',
38+
name='close'))
39+
fig.update_layout(showlegend=True)
40+
fig.show()
41+
```
42+
![fig](Figs/F1.JPG)
43+
44+
## Data Preprocessing
45+
```
46+
train_size = int(len(df) * 0.8)
47+
test_size = len(df) - train_size
48+
train, test = df.iloc[0:train_size], df.iloc[train_size:len(df)]
49+
print(train.shape, test.shape)
50+
from sklearn.preprocessing import StandardScaler
51+
52+
scaler = StandardScaler()
53+
scaler = scaler.fit(train[['close']])
54+
55+
train['close'] = scaler.transform(train[['close']])
56+
test['close'] = scaler.transform(test[['close']])
57+
```
58+
59+
## Create Training and Test Splits
60+
```
61+
def create_dataset(X, y, time_steps=1):
62+
Xs, ys = [], []
63+
for i in range(len(X) - time_steps):
64+
v = X.iloc[i:(i + time_steps)].values
65+
Xs.append(v)
66+
ys.append(y.iloc[i + time_steps])
67+
return np.array(Xs), np.array(ys)
68+
69+
time_steps = 30
70+
71+
X_train, y_train = create_dataset(train[['close']], train.close, time_steps)
72+
X_test, y_test = create_dataset(test[['close']], test.close, time_steps)
73+
74+
print(X_train.shape)
75+
```
76+
77+
## Build an LSTM Autoencoder
78+
```
79+
timesteps = X_train.shape[1]
80+
num_features = X_train.shape[2]
81+
from tensorflow.keras.models import Sequential
82+
from tensorflow.keras.layers import Dense, LSTM, Dropout, RepeatVector, TimeDistributed
83+
84+
model = Sequential([
85+
LSTM(128, input_shape=(timesteps, num_features)),
86+
Dropout(0.2),
87+
RepeatVector(timesteps),
88+
LSTM(128, return_sequences=True),
89+
Dropout(0.2),
90+
TimeDistributed(Dense(num_features))
91+
])
92+
93+
model.compile(loss='mae', optimizer='adam')
94+
model.summary()
95+
```
96+
![fig](Figs/Summary.JPG)
97+
## Training
98+
```
99+
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, mode='min')
100+
history = model.fit(
101+
X_train, y_train,
102+
epochs=100,
103+
batch_size=32,
104+
validation_split=0.1,
105+
callbacks = [es],
106+
shuffle=False
107+
)
108+
```
109+
110+
## Plot Metrics and Evaluate the Model
111+
```
112+
plt.plot(history.history['loss'], label='Training Loss')
113+
plt.plot(history.history['val_loss'], label='Validation Loss')
114+
plt.legend();
115+
```
116+
![fig](Figs/F2.JPG)
117+
118+
```
119+
X_train_pred = model.predict(X_train)
120+
121+
train_mae_loss = pd.DataFrame(np.mean(np.abs(X_train_pred - X_train), axis=1), columns=['Error'])
122+
model.evaluate(X_test, y_test)
123+
124+
```
125+
126+
127+
```
128+
sns.distplot(train_mae_loss, bins=50, kde=True);
129+
130+
```
131+
![fig](Figs/F3.JPG)
132+
133+
```
134+
X_test_pred = model.predict(X_test)
135+
136+
test_mae_loss = np.mean(np.abs(X_test_pred - X_test), axis=1)
137+
sns.distplot(test_mae_loss, bins=50, kde=True);
138+
139+
```
140+
![fig](Figs/F4.JPG)
141+
142+
## Detect Anomalies in the S&P 500 Index Data
143+
```
144+
THRESHOLD = 0.65
145+
146+
test_score_df = pd.DataFrame(test[time_steps:])
147+
test_score_df['loss'] = test_mae_loss
148+
test_score_df['threshold'] = THRESHOLD
149+
test_score_df['anomaly'] = test_score_df.loss > test_score_df.threshold
150+
test_score_df['close'] = test[time_steps:].close
151+
fig = go.Figure()
152+
fig.add_trace(go.Scatter(x=test[time_steps:].date, y=test_score_df.loss,
153+
mode='lines',
154+
name='Test Loss'))
155+
fig.add_trace(go.Scatter(x=test[time_steps:].date, y=test_score_df.threshold,
156+
mode='lines',
157+
name='Threshold'))
158+
fig.update_layout(showlegend=True)
159+
fig.show()
160+
161+
```
162+
![fig](Figs/F5.JPG)
163+
164+
165+
```
166+
anomalies = test_score_df[test_score_df.anomaly == True]
167+
anomalies.head()
168+
```
169+
![fig](Figs/F6.JPG)
170+
171+
```
172+
fig = go.Figure()
173+
fig.add_trace(go.Scatter(x=test[time_steps:].date, y=scaler.inverse_transform(test[time_steps:].close),
174+
mode='lines',
175+
name='Close Price'))
176+
fig.add_trace(go.Scatter(x=anomalies.date, y=scaler.inverse_transform(anomalies.close),
177+
mode='markers',
178+
name='Anomaly'))
179+
fig.update_layout(showlegend=True)
180+
fig.show()
181+
```
182+
![fig](Figs/F7.JPG)
183+

0 commit comments

Comments
 (0)