-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfashion_mnist_cnn.py
168 lines (135 loc) · 5.63 KB
/
fashion_mnist_cnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# baseline cnn model for fashion mnist
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
device_name = "/gpu:0"
def load_dataset():
"""Loads Fashion MNIST dataset from keras datasets
Reshapes the dataset
One hot encodes target values
Returns:
trainX: Training data
trainY: Training data target
testX: Testing data
testY: Testing data target
"""
# load dataset
(trainX, trainY), (testX, testY) = fashion_mnist.load_data()
# reshape dataset to have a single channel
trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
testX = testX.reshape((testX.shape[0], 28, 28, 1))
# one hot encode target values
trainY = to_categorical(trainY)
testY = to_categorical(testY)
return trainX, trainY, testX, testY
def prep_pixels(train, test):
"""Scales pixels: converts values to float 32 and normalizes the data"""
# convert from integers to floats
train_norm = train.astype('float32')
test_norm = test.astype('float32')
# normalize to range 0-1
train_norm = train_norm / 255.0
test_norm = test_norm / 255.0
# return normalized images
return train_norm, test_norm
"""
The target is to maximize the accuracy of the cnn-mnist model:
We start by having a few decisions related to the model.
// Model hyper-parameters
x1 = 32-128 filters in each CNN layer
x2 = 50-100 neurons in first dense layer
x3 = no dropout, 0.1 dropout, 0.2 dropout
x4 = learning rate 0.005 or 0.05
x5 = kernel size cnn 2-4
// Data augmentation hyper-parameters
x6 = rotation_range: 0-30
x7 = width_shift_range: 0.0-0.3,
x8 = height_shift_range: 0.0-0.3
x9 = horizontal_flip: 0 or 1
We will use a genetic algorithm to find the best combinations of these hyperparameters.
The fitness-function is based on model accuracy on test-dataset.
Best so far with 92.29 %
0.6151899027462715,
0.8630441247971613,
0.48568135841046134,
0.0032114426394213025,
0.30784428242021955,
0.1473845685726038,
0.3145443359129966,
0.07676705567952473,
0.4287397570066659
]
"""
def define_model_ga(hyperparameter_arr):
"""Defining and compiling the CNN model
Sets the hyperparameter for given model
Args:
hyperparameter_arr (numpy ndarray): array of hyperparameters
Returns:
model (keras sequential): The compiled CNN model
"""
# set hyperparameters based on array pop
neurons_cnn = 32 + int((128-32+1) * hyperparameter_arr[0]) # min 32, max 128
neurons_first = 50 + int((100-50+1)* hyperparameter_arr[1])
dropout = (0.3)* hyperparameter_arr[2]
learning_rate = 0.005 + (0.05-0.005) * hyperparameter_arr[3]
kernel_size_cnn = 2 + int((4-2+1)*hyperparameter_arr[4])
# define model
model = Sequential()
model.add(Conv2D(neurons_cnn, (kernel_size_cnn, kernel_size_cnn), activation='relu', kernel_initializer="he_uniform", input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(neurons_cnn, (kernel_size_cnn, kernel_size_cnn), activation='relu', kernel_initializer="he_uniform", input_shape=(28, 28, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(dropout))
model.add(Flatten())
model.add(Dense(neurons_first, activation='relu'))
model.add(Dense(10, activation='softmax'))
# compile model
opt = SGD(lr=learning_rate, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
return model
def evaluate_model_ga(pop):
"""Evaluates the model
Loads and prepares the dataset
Runs one population of CNN models with augmented data
Args:
pop (numpy ndarray): array with all individuals in the population holding their chromosomes
Returns:
pop_scores (list): list of the accuracy of all individuals in the population
"""
# load dataset
trainX, trainY, testX, testY = load_dataset()
# prepare pixel data
trainX, testX = prep_pixels(trainX, testX)
# run model evaluation for each hyperparameter population
pop_scores = []
with tf.device(device_name):
for arr in pop:
# set data augmentation parameters
rotation_range = 30*arr[5]
width_shift = 0.3*arr[6]
height_shift = 0.3*arr[7]
horizontal_flip = bool(int(2*arr[8]))
# do data augmentation
datagen = ImageDataGenerator(
rotation_range=rotation_range,
width_shift_range=width_shift,
height_shift_range=height_shift,
horizontal_flip=horizontal_flip,
validation_split=0.2)
# define model
model = define_model_ga(arr)
callback = tf.keras.callbacks.EarlyStopping(patience=5)
# fit model
history = model.fit(datagen.flow(trainX, trainY, batch_size=32,subset='training'),
validation_data=datagen.flow(trainX, trainY,batch_size=8, subset='validation'),
epochs=50, callbacks=[callback], verbose=1)
# evaluate model
_, acc = model.evaluate(testX, testY, verbose=0)
pop_scores.append(acc)
print('Accuracy pop: > %.3f' % (acc * 100.0), arr)
return pop_scores