Skip to content

Commit e2a5f23

Browse files
committed
added up to part 9
Backpropagation still isn't right though. I need to investigate further into it
1 parent da51bc1 commit e2a5f23

9 files changed

+216
-10343
lines changed

Diff for: __pycache__/ex1helper.cpython-36.pyc

-1.78 KB
Binary file not shown.

Diff for: __pycache__/ex2helper.cpython-36.pyc

0 Bytes
Binary file not shown.

Diff for: __pycache__/ex3helper.cpython-36.pyc

0 Bytes
Binary file not shown.

Diff for: __pycache__/ex4Checker.cpython-36.pyc

1.94 KB
Binary file not shown.

Diff for: __pycache__/ex4helper.cpython-36.pyc

833 Bytes
Binary file not shown.

Diff for: ex4.py

+55-8
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import scipy.io as io
2222
import ex3helper as helper3
2323
import ex4helper as helper
24+
import ex4Checker as checker
2425

2526
## Setup the parameters you will use for this exercise
2627
inputLayerSize = 400; # 20x20 Input Images of Digits
@@ -40,7 +41,6 @@
4041
y = np.squeeze(mat['y'])
4142

4243
m = X.shape[0]
43-
4444
# Randomly select 100 data points to display
4545
perm = np.random.permutation(m)
4646
sel = X[perm[0:100],:]
@@ -61,7 +61,7 @@
6161
theta1 = mat['Theta1']
6262
theta2 = mat['Theta2']
6363

64-
nnParams = np.array([theta1.flatten(), theta2.flatten()])
64+
nnParams = np.append(theta1.flatten(), theta2.flatten())
6565

6666

6767
## ================ Part 3: Compute Cost (Feedforward) ================
@@ -81,7 +81,7 @@
8181
# Weight regularization parameter (we set this to 0 here).
8282
lambdaVal = 0
8383

84-
J = helper.nnCostFunction(nnParams, X, y, lambdaVal)
84+
J = helper.nnCostFunction(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
8585

8686
print('Cost at parameters (loaded from ex4weights): {:.6f}'.format(J))
8787
print('this value should be approx: 0.287629')
@@ -98,7 +98,7 @@
9898
# Weight regularization parameter (we set this to 1 here).
9999
lambdaVal = 1
100100

101-
J = helper.nnCostFunction(nnParams, X, y, lambdaVal)
101+
J = helper.nnCostFunction(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
102102

103103
print('Cost at parameters (loaded from ex4weights): {:.6f}'.format(J))
104104
print('this value should be approx: 0.383770')
@@ -129,10 +129,10 @@
129129
print('\nInitializing Neural Network Parameters ...')
130130

131131
initialTheta1 = np.random.rand(inputLayerSize + 1, hiddenLayerSize)
132-
initialTheta2 = np.random.rand(hiddenLayerSize + 1, num_labels)
132+
initialTheta2 = np.random.rand(hiddenLayerSize + 1, numLabels)
133133

134134
# Unroll parameters
135-
initialNNParams = np.array([initialTheta1.flatten(), initialTheta2.flatten()])
135+
initialNNParams = np.append(initialTheta1.flatten(), initialTheta2.flatten())
136136

137137
## =============== Part 7: Implement Backpropagation ===============
138138
# Once your cost matches up with ours, you should proceed to implement the
@@ -143,6 +143,53 @@
143143
print('\nChecking Backpropagation... ')
144144

145145
#Check gradients by running checkNNGradients
146-
#helper.checkNNGradients()
146+
checker.checkNNGradients(0)
147+
148+
input('\nPart 6 & 7 completed. Program paused. Press enter to continue: ')
149+
150+
## =============== Part 8: Implement Regularization ===============
151+
# Once your backpropagation implementation is correct, you should now
152+
# continue to implement the regularization with the cost and gradient.
153+
#
154+
155+
print('\nChecking Backpropagation (w/ Regularization) ... ')
156+
157+
# After you have completed the assignment, change the MaxIter to a larger
158+
# value to see how more training helps.
159+
160+
161+
# You should also try different values of lambda
162+
lambdaVal = 3
163+
checker.checkNNGradients(lambdaVal)
164+
165+
debug_J = helper.nnCostFunction(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
166+
167+
print('Cost at parameters (loaded from ex4weights): {:.6f}'.format(debug_J))
168+
print('this value should be approx: 0.576051')
169+
170+
input('\nPart 8 completed. Program paused. Press enter to continue: ')
171+
172+
173+
## =================== Part 9: Training NN ===================
174+
# You have now implemented all the code necessary to train a neural
175+
# network. To train your neural network, we will now use "fmincg", which
176+
# is a function which works similarly to "fminunc". Recall that these
177+
# advanced optimizers are able to train our cost functions efficiently as
178+
# long as we provide them with the gradient computations.
179+
#
180+
181+
print('\nTraining Neural Network... ')
182+
183+
MaxIter = 50
184+
lambdaVal = 1
185+
186+
finalParams = helper.optimizeNN(initialNNParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, MaxIter)
187+
188+
print(finalParams)
189+
190+
input('\nPart 9 completed. Program paused. Press enter to continue: ')
147191

148-
#input('\nPart 6 & 7 completed. Program paused. Press enter to continue: ')
192+
## ================= Part 10: Visualize Weights =================
193+
# You can now "visualize" what the neural network is learning by
194+
# displaying the hidden units to see what features they are capturing in
195+
# the data.

Diff for: ex4Checker.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
import scipy.optimize as op
4+
import ex4helper as helper
5+
import math
6+
import matplotlib.image as mpimg
7+
from numpy import linalg as LA
8+
9+
def backPropMask(nnParams,*args):
10+
return helper.BackPropagation(nnParams,*args)
11+
12+
def costMask(nnParams,*args):
13+
return helper.nnCostFunction(nnParams,*args)
14+
15+
def checkNNGradients(lambdaVal):
16+
# CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
17+
# backpropagation gradients, it will output the analytical gradients
18+
# produced by your backprop code and the numerical gradients (computed
19+
# using computeNumericalGradient). These two gradient computations should
20+
# result in very similar values.
21+
#
22+
23+
inputLayerSize = 3
24+
hiddenLayerSize = 5
25+
numLabels = 3
26+
m = 5
27+
28+
#We generate some 'random' test data
29+
theta1 = debugInitializeWeights(hiddenLayerSize, inputLayerSize)
30+
theta2 = debugInitializeWeights(numLabels, hiddenLayerSize)
31+
32+
# Reusing debugInitializeWeights to generate X
33+
X = debugInitializeWeights(m, inputLayerSize - 1);
34+
y = np.remainder(np.arange(m),numLabels) + 1
35+
36+
#unroll parameters
37+
nnParams = np.append(theta1.flatten(), theta2.flatten())
38+
39+
grad = helper.BackPropagation(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
40+
41+
diff = op.check_grad(costMask, backPropMask, nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, epsilon=.0001)
42+
43+
numGrad = op.approx_fprime(nnParams, costMask, .001 , inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
44+
# Visually examine the two gradient computations. The two columns you get should be very similar.
45+
46+
47+
print('\nComparing Gradients: (numGrad, grad, absolute difference)')
48+
49+
for i in range(0,numGrad.shape[0]):
50+
print("{}: {:.9f}, {:.9f} {:.9f}".format(i+1, numGrad[i], grad[i], abs(numGrad[i] - grad[i] )))
51+
52+
53+
print('The above two columns you get should be very similar.')
54+
print('(Left-Your Numerical Gradient, Right-Analytical Gradient)')
55+
56+
# Evaluate the norm of the difference between two solutions.
57+
# If you have a correct implementation, and assuming you used EPSILON = 0.0001
58+
# in computeNumericalGradient.m, then diff below should be less than 1e-9
59+
60+
print('If your backpropagation implementation is correct, then ')
61+
print('the relative difference will be small (less than 1e-9).')
62+
print('Relative Difference: {}'.format(diff))
63+
64+
65+
def debugInitializeWeights(fanOut, fanIn):
66+
# Initialize W using "sin", this ensures that vW is always of the same
67+
# values and will be useful for debugging
68+
# W = zeros(fan_out, 1 + fan_in);
69+
# W = reshape(sin(1:numel(W)), size(W)) / 10;
70+
# numel ~ number of elements. equivalent to size, w.size
71+
# size, equivalent of shape, w.shape
72+
W = np.arange(fanOut*(fanIn+1))
73+
W = W.reshape(fanOut, fanIn+1)
74+
W = np.sin(W)/10
75+
return W
76+
77+
if __name__ == '__main__':
78+
checkNNGradients(0)
79+

Diff for: ex4helper.py

+82-50
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,23 @@
1-
import numpy as np
1+
import numpy as np
22
import matplotlib.pyplot as plt
33
import scipy.optimize as op
4-
import ex2helper as helper
54
import math
65
import matplotlib.image as mpimg
76

8-
def nnCostFunction(nnParams, X, y, lambdaVal):
7+
def nnCostFunction(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal):
8+
#get num examples
99
m = X.shape[0]
10-
print(m)
11-
theta1 = nnParams[0]
12-
n1 = X.shape[1] + 1
13-
n2 = theta1.shape[0]
14-
theta1 = theta1.reshape(int(n2/n1),n1)
15-
theta2 = nnParams[1]
16-
n1 = theta1.shape[0] + 1
17-
n2 = theta2.shape[0]
18-
theta2 = theta2.reshape(int(n2/n1),n1)
19-
10+
11+
#get Theta Matrices
12+
[theta1, theta2] = getThetas(nnParams,inputSize,hiddenLayerSize,outputSize)
2013

2114
#prepare Y matrix for cost function
22-
numLabels = np.unique(y).shape[0]+1
23-
#create boolean array of value or not out of 1s and 0s
24-
Y = (y==1).astype(int)
25-
for i in range(2,numLabels):
26-
Y = np.append(Y,(y==i).astype(int))
27-
#reshape so first dimension corresponds with label
28-
Y = Y.reshape(10,5000)
15+
Y = getYMatrix(y)
2916

3017
X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
31-
h1 = helper.sigmoid(np.matmul(X,theta1.transpose()))
18+
h1 = sigmoid(np.matmul(X,theta1.transpose()))
3219
h1 = np.insert(h1,0,np.ones(h1.shape[0]),axis=1) # adding bias unit
33-
h2 = helper.sigmoid(np.matmul(h1,theta2.transpose())).transpose()
20+
h2 = sigmoid(np.matmul(h1,theta2.transpose()))
3421

3522
#getting regulation parameters
3623
R1 = theta1[:,1:]
@@ -39,56 +26,101 @@ def nnCostFunction(nnParams, X, y, lambdaVal):
3926
costRegulation = lambdaVal*(np.sum(np.square(R1.flatten())) + np.sum(np.square(R2.flatten())))/(2*m)
4027

4128
#calculating true cost without regulation
42-
cost = np.sum(np.multiply(np.log(h2),Y)) + np.sum(np.multiply(np.log(1-h2),1-Y))
29+
cost = np.sum(np.log(h2)*Y) + np.sum(np.log(1-h2)*(1-Y))
4330
cost = -cost/m
4431

4532
#calculate total cost
4633
totalCost = cost + costRegulation
4734

4835
return totalCost
4936

50-
51-
def nnGradFunction(nnParams, X, y, lambdaVal):
37+
def BackPropagation(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal):
38+
#get num examples
5239
m = X.shape[0]
40+
41+
#get Theta Matrices
42+
[theta1, theta2] = getThetas(nnParams,inputSize,hiddenLayerSize,outputSize)
43+
44+
#prepare Y matrix for cost function
45+
Y = getYMatrix(y) #5x3
46+
47+
#forward Pass
48+
[a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
49+
#a1 = 5x4, z1 = 5x5, a2 = 5x5, a2 = 5x6, z2 = 5x3, h2 = 5x3
50+
51+
52+
#backward
53+
theta2Error = h2-Y #5x3
54+
theta1Error = np.matmul(theta2Error,theta2[:,1:])*sigmoidGradient(z1)
55+
56+
D1 = np.matmul(theta1Error.transpose(),a1)
57+
D2 = np.matmul(theta2Error.transpose(),a2)
58+
59+
theta2delta = theta2Error * (h2*(1-h2))
60+
theta2other = np.dot(a2.transpose(),theta2delta)
61+
print(theta2other.flatten())
62+
#average the gradient per example
63+
theta1Grad = D1/m
64+
theta2Grad = D2/m
65+
66+
#calculate regulation terms
67+
theta1Reg = lambdaVal*theta1/m
68+
theta2Reg = lambdaVal*theta2/m
69+
theta1Reg[:,0] = 0
70+
theta2Reg[:,0] = 0
71+
72+
#combine gradient and regulation terms
73+
theta1Grad = theta1Grad + theta1Reg
74+
theta2Grad = theta2Grad + theta2Reg
75+
76+
return np.append(theta1Grad.flatten(), theta2Grad.flatten())
77+
78+
def forwardPass(nnParams, X):
5379
theta1 = nnParams[0]
54-
n1 = X.shape[1] + 1
55-
n2 = theta1.shape[0]
56-
theta1 = theta1.reshape(int(n2/n1),n1)
5780
theta2 = nnParams[1]
58-
n1 = theta1.shape[0] + 1
59-
n2 = theta2.shape[0]
60-
theta2 = theta2.reshape(int(n2/n1),n1)
61-
6281

82+
#layer 1
83+
a1 = np.insert(X,0,np.ones(X.shape[0]),axis=1)
84+
z1 = np.matmul(a1,theta1.transpose())
85+
a2 = sigmoid(z1)
86+
87+
#layer 2
88+
a2 = np.insert(a2,0,np.ones(a1.shape[0]),axis=1) # adding bias unit 5x6
89+
z2 = np.matmul(a2,theta2.transpose()) #5x3
90+
h2 = sigmoid(z2) #5x3
91+
92+
return [a1, z1, a2, z2, h2]
93+
94+
def getYMatrix(y):
6395
#prepare Y matrix for cost function
64-
numLabels = np.unique(y).shape[0]+1
96+
numLabels = np.unique(y).shape[0]
97+
6598
#create boolean array of value or not out of 1s and 0s
6699
Y = (y==1).astype(int)
67-
for i in range(2,numLabels):
100+
for i in range(2, numLabels + 1):
68101
Y = np.append(Y,(y==i).astype(int))
69102
#reshape so first dimension corresponds with label
70-
Y = Y.reshape(10,5000)
71-
72-
X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
73-
h1 = helper.sigmoid(np.matmul(X,theta1.transpose()))
74-
h1 = np.insert(h1,0,np.ones(h1.shape[0]),axis=1) # adding bias unit
75-
h2 = helper.sigmoid(np.matmul(h1,theta2.transpose())).transpose()
103+
Y = Y.reshape(numLabels,y.shape[0])
104+
return Y.transpose()
76105

106+
def getThetas(nnParams,inputSize,hiddenLayerSize,outputSize):
107+
theta1Length = (inputSize+1)*hiddenLayerSize
77108

78-
#calculate gradients
79-
theta2Error = h2-Y
80-
theta1Error = np.multiply(np.matmul(theta2Error.transpose(),theta2),np.multiply(h1,1-h1))
81-
theta1Grad = np.matmul(theta1Error.transpose(),X)
82-
theta1Grad = theta1Grad[1:,:]#drop bias unit error from hiddent layer
83-
theta2Grad = np.matmul(theta2Error,h1)
84-
109+
theta1 = nnParams[:theta1Length]
110+
theta2 = nnParams[theta1Length:]
85111

86-
return np.array([theta1Grad.flatten(), theta2Grad.flatten()])
112+
theta1 = theta1.reshape(hiddenLayerSize,inputSize+1)
113+
theta2 = theta2.reshape(outputSize,hiddenLayerSize+1)
87114

115+
return[theta1, theta2]
88116

89117
def sigmoidGradient(Z):
90-
R = helper.sigmoid(Z)
91-
return np.multiply(R,1-R)
118+
R = sigmoid(Z)
119+
return R*(1-R)
92120

121+
def sigmoid(Z):
93122

123+
return 1/(1+np.exp(-Z))
94124

125+
def optimizeNN(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal, maxIter):
126+
return op.minimize(fun=nnCostFunction, x0=nnParams, args=(inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal), method='TNC', jac = BackPropagation, options={'maxiter': maxIter, 'disp': True})

0 commit comments

Comments
 (0)