xiaoyang-coder
diff --git a/‎__pycache__/ex1helper.cpython-36.pyc
-1.78 KB b/‎__pycache__/ex1helper.cpython-36.pyc
-1.78 KB
diff --git a/‎__pycache__/ex2helper.cpython-36.pyc
0 Bytes b/‎__pycache__/ex2helper.cpython-36.pyc
0 Bytes
diff --git a/‎__pycache__/ex3helper.cpython-36.pyc
0 Bytes b/‎__pycache__/ex3helper.cpython-36.pyc
0 Bytes
diff --git a/‎__pycache__/ex4Checker.cpython-36.pyc
1.94 KB b/‎__pycache__/ex4Checker.cpython-36.pyc
1.94 KB
diff --git a/‎__pycache__/ex4helper.cpython-36.pyc
833 Bytes b/‎__pycache__/ex4helper.cpython-36.pyc
833 Bytes
diff --git a/‎ex4.py
Lines changed: 55 additions & 8 deletions b/‎ex4.py
Lines changed: 55 additions & 8 deletions
diff --git a/‎ex4Checker.py
Lines changed: 79 additions & 0 deletions b/‎ex4Checker.py
Lines changed: 79 additions & 0 deletions
diff --git a/‎ex4helper.py
Lines changed: 82 additions & 50 deletions b/‎ex4helper.py
Lines changed: 82 additions & 50 deletions
@@ -21,6 +21,7 @@
 import scipy.io as io
 import ex3helper as helper3
 import ex4helper as helper
+import ex4Checker as checker
 
 ## Setup the parameters you will use for this exercise
 inputLayerSize  = 400;  # 20x20 Input Images of Digits
@@ -40,7 +41,6 @@
 y = np.squeeze(mat['y'])
 
 m = X.shape[0]
-
 # Randomly select 100 data points to display
 perm = np.random.permutation(m)
 sel = X[perm[0:100],:]
@@ -61,7 +61,7 @@
 theta1 = mat['Theta1']
 theta2 = mat['Theta2']
 
-nnParams = np.array([theta1.flatten(), theta2.flatten()])
+nnParams = np.append(theta1.flatten(), theta2.flatten())
 
 
 ## ================ Part 3: Compute Cost (Feedforward) ================
@@ -81,7 +81,7 @@
 # Weight regularization parameter (we set this to 0 here).
 lambdaVal = 0
 
-J = helper.nnCostFunction(nnParams, X, y, lambdaVal)
+J = helper.nnCostFunction(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
 
 print('Cost at parameters (loaded from ex4weights): {:.6f}'.format(J))
 print('this value should be approx: 0.287629')
@@ -98,7 +98,7 @@
 # Weight regularization parameter (we set this to 1 here).
 lambdaVal = 1
 
-J = helper.nnCostFunction(nnParams, X, y, lambdaVal)
+J = helper.nnCostFunction(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
 
 print('Cost at parameters (loaded from ex4weights): {:.6f}'.format(J))
 print('this value should be approx: 0.383770')
@@ -129,10 +129,10 @@
 print('\nInitializing Neural Network Parameters ...')
 
 initialTheta1 = np.random.rand(inputLayerSize + 1, hiddenLayerSize)
-initialTheta2 = np.random.rand(hiddenLayerSize + 1, num_labels)
+initialTheta2 = np.random.rand(hiddenLayerSize + 1, numLabels)
 
 # Unroll parameters
-initialNNParams = np.array([initialTheta1.flatten(), initialTheta2.flatten()])
+initialNNParams = np.append(initialTheta1.flatten(), initialTheta2.flatten())
 
 ## =============== Part 7: Implement Backpropagation ===============
 #  Once your cost matches up with ours, you should proceed to implement the
@@ -143,6 +143,53 @@
 print('\nChecking Backpropagation... ')
 
 #Check gradients by running checkNNGradients
-#helper.checkNNGradients()
+checker.checkNNGradients(0)
+
+input('\nPart 6 & 7 completed. Program paused. Press enter to continue: ')
+
+## =============== Part 8: Implement Regularization ===============
+#  Once your backpropagation implementation is correct, you should now
+#  continue to implement the regularization with the cost and gradient.
+#
+
+print('\nChecking Backpropagation (w/ Regularization) ... ')
+
+#  After you have completed the assignment, change the MaxIter to a larger
+#  value to see how more training helps.
+
+
+#  You should also try different values of lambda
+lambdaVal = 3
+checker.checkNNGradients(lambdaVal)
+
+debug_J  = helper.nnCostFunction(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
+
+print('Cost at parameters (loaded from ex4weights): {:.6f}'.format(debug_J))
+print('this value should be approx: 0.576051')
+
+input('\nPart 8 completed. Program paused. Press enter to continue: ')
+
+
+## =================== Part 9: Training NN ===================
+#  You have now implemented all the code necessary to train a neural 
+#  network. To train your neural network, we will now use "fmincg", which
+#  is a function which works similarly to "fminunc". Recall that these
+#  advanced optimizers are able to train our cost functions efficiently as
+#  long as we provide them with the gradient computations.
+#
+
+print('\nTraining Neural Network... ')
+
+MaxIter = 50
+lambdaVal = 1
+	
+finalParams = helper.optimizeNN(initialNNParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, MaxIter)
+
+print(finalParams)
+
+input('\nPart 9 completed. Program paused. Press enter to continue: ')
 
-#input('\nPart 6 & 7 completed. Program paused. Press enter to continue: ')
+## ================= Part 10: Visualize Weights =================
+#  You can now "visualize" what the neural network is learning by 
+#  displaying the hidden units to see what features they are capturing in 
+#  the data.
@@ -0,0 +1,79 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import scipy.optimize as op
+import ex4helper as helper
+import math
+import matplotlib.image as mpimg
+from numpy import linalg as LA
+
+def backPropMask(nnParams,*args):
+	return helper.BackPropagation(nnParams,*args)
+	
+def costMask(nnParams,*args):
+	return helper.nnCostFunction(nnParams,*args)
+
+def checkNNGradients(lambdaVal):
+	#   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
+	#   backpropagation gradients, it will output the analytical gradients
+	#   produced by your backprop code and the numerical gradients (computed
+	#   using computeNumericalGradient). These two gradient computations should
+	#   result in very similar values.
+	#
+
+	inputLayerSize = 3
+	hiddenLayerSize = 5
+	numLabels = 3
+	m = 5
+
+	#We generate some 'random' test data
+	theta1 = debugInitializeWeights(hiddenLayerSize, inputLayerSize)
+	theta2 = debugInitializeWeights(numLabels, hiddenLayerSize)
+	
+	# Reusing debugInitializeWeights to generate X
+	X = debugInitializeWeights(m, inputLayerSize - 1); 
+	y = np.remainder(np.arange(m),numLabels) + 1
+
+	#unroll parameters
+	nnParams = np.append(theta1.flatten(), theta2.flatten())
+
+	grad = helper.BackPropagation(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
+
+	diff = op.check_grad(costMask, backPropMask, nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, epsilon=.0001)
+	
+	numGrad = op.approx_fprime(nnParams, costMask, .001 , inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
+	# Visually examine the two gradient computations.  The two columns you get should be very similar.
+
+
+	print('\nComparing Gradients: (numGrad, grad, absolute difference)')
+
+	for i in range(0,numGrad.shape[0]):
+		print("{}: {:.9f}, {:.9f} {:.9f}".format(i+1, numGrad[i], grad[i], abs(numGrad[i] - grad[i] )))
+	
+
+	print('The above two columns you get should be very similar.')
+	print('(Left-Your Numerical Gradient, Right-Analytical Gradient)')
+
+	# Evaluate the norm of the difference between two solutions.  
+	# If you have a correct implementation, and assuming you used EPSILON = 0.0001 
+	# in computeNumericalGradient.m, then diff below should be less than 1e-9
+
+	print('If your backpropagation implementation is correct, then ')
+	print('the relative difference will be small (less than 1e-9).')
+	print('Relative Difference: {}'.format(diff))
+
+
+def debugInitializeWeights(fanOut, fanIn):
+   	# Initialize W using "sin", this ensures that  vW is always of the same
+	# values and will be useful for debugging
+		# W = zeros(fan_out, 1 + fan_in);
+		# W = reshape(sin(1:numel(W)), size(W)) / 10;
+	# numel ~ number of elements. equivalent to size, w.size
+	# size, equivalent of shape, w.shape
+	W = np.arange(fanOut*(fanIn+1))
+	W = W.reshape(fanOut, fanIn+1)
+	W = np.sin(W)/10
+	return W
+
+if __name__ == '__main__':
+	checkNNGradients(0)
+
@@ -1,36 +1,23 @@
-import numpy as np
+import numpy as np 
 import matplotlib.pyplot as plt
 import scipy.optimize as op
-import ex2helper as helper
 import math
 import matplotlib.image as mpimg
 
-def nnCostFunction(nnParams, X, y, lambdaVal):
+def nnCostFunction(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal):
+	#get num examples
 	m = X.shape[0]
-	print(m)
-	theta1 = nnParams[0]
-	n1 = X.shape[1] + 1
-	n2 = theta1.shape[0]
-	theta1 = theta1.reshape(int(n2/n1),n1)
-	theta2 = nnParams[1]
-	n1 = theta1.shape[0] + 1
-	n2 = theta2.shape[0]
-	theta2 = theta2.reshape(int(n2/n1),n1)
-	
+
+	#get Theta Matrices
+	[theta1, theta2] = getThetas(nnParams,inputSize,hiddenLayerSize,outputSize)
 
 	#prepare Y matrix for cost function
-	numLabels = np.unique(y).shape[0]+1
-	#create boolean array of value or not out of 1s and 0s
-	Y = (y==1).astype(int)
-	for i in range(2,numLabels):
-		Y = np.append(Y,(y==i).astype(int))
-	#reshape so first dimension corresponds with label
-	Y = Y.reshape(10,5000)
+	Y = getYMatrix(y)
 
 	X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
-	h1 = helper.sigmoid(np.matmul(X,theta1.transpose()))
+	h1 = sigmoid(np.matmul(X,theta1.transpose()))
 	h1 = np.insert(h1,0,np.ones(h1.shape[0]),axis=1) # adding bias unit
-	h2 = helper.sigmoid(np.matmul(h1,theta2.transpose())).transpose()
+	h2 = sigmoid(np.matmul(h1,theta2.transpose()))
 
 	#getting regulation parameters
 	R1 = theta1[:,1:]
@@ -39,56 +26,101 @@ def nnCostFunction(nnParams, X, y, lambdaVal):
 	costRegulation = lambdaVal*(np.sum(np.square(R1.flatten())) + np.sum(np.square(R2.flatten())))/(2*m)
 
 	#calculating true cost without regulation
-	cost = np.sum(np.multiply(np.log(h2),Y)) + np.sum(np.multiply(np.log(1-h2),1-Y))
+	cost = np.sum(np.log(h2)*Y) + np.sum(np.log(1-h2)*(1-Y))
 	cost = -cost/m
 
 	#calculate total cost
 	totalCost = cost + costRegulation
 
 	return totalCost
 
-
-def nnGradFunction(nnParams, X, y, lambdaVal):
+def BackPropagation(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal):
+	#get num examples
 	m = X.shape[0]
+
+	#get Theta Matrices
+	[theta1, theta2] = getThetas(nnParams,inputSize,hiddenLayerSize,outputSize)
+
+	#prepare Y matrix for cost function
+	Y = getYMatrix(y) #5x3
+
+	#forward Pass
+	[a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
+	#a1 = 5x4, z1 = 5x5, a2 = 5x5, a2 = 5x6, z2 = 5x3, h2 = 5x3
+
+
+	#backward
+	theta2Error = h2-Y #5x3
+	theta1Error = np.matmul(theta2Error,theta2[:,1:])*sigmoidGradient(z1)
+	
+	D1 = np.matmul(theta1Error.transpose(),a1)
+	D2 = np.matmul(theta2Error.transpose(),a2)
+
+	theta2delta = theta2Error * (h2*(1-h2))
+	theta2other = np.dot(a2.transpose(),theta2delta)
+	print(theta2other.flatten())
+	#average the gradient per example	
+	theta1Grad = D1/m
+	theta2Grad = D2/m
+
+	#calculate regulation terms
+	theta1Reg = lambdaVal*theta1/m
+	theta2Reg = lambdaVal*theta2/m
+	theta1Reg[:,0] = 0
+	theta2Reg[:,0] = 0
+
+	#combine gradient and regulation terms	
+	theta1Grad = theta1Grad + theta1Reg
+	theta2Grad = theta2Grad + theta2Reg
+
+	return np.append(theta1Grad.flatten(), theta2Grad.flatten())
+
+def forwardPass(nnParams, X):
 	theta1 = nnParams[0]
-	n1 = X.shape[1] + 1
-	n2 = theta1.shape[0]
-	theta1 = theta1.reshape(int(n2/n1),n1)
 	theta2 = nnParams[1]
-	n1 = theta1.shape[0] + 1
-	n2 = theta2.shape[0]
-	theta2 = theta2.reshape(int(n2/n1),n1)
-	
 
+	#layer 1
+	a1 = np.insert(X,0,np.ones(X.shape[0]),axis=1)
+	z1 = np.matmul(a1,theta1.transpose())
+	a2 = sigmoid(z1)
+
+	#layer 2
+	a2 = np.insert(a2,0,np.ones(a1.shape[0]),axis=1) # adding bias unit  5x6
+	z2 = np.matmul(a2,theta2.transpose()) #5x3
+	h2 = sigmoid(z2) #5x3
+
+	return [a1, z1, a2, z2, h2]
+
+def getYMatrix(y):
 	#prepare Y matrix for cost function
-	numLabels = np.unique(y).shape[0]+1
+	numLabels = np.unique(y).shape[0]
+
 	#create boolean array of value or not out of 1s and 0s
 	Y = (y==1).astype(int)
-	for i in range(2,numLabels):
+	for i in range(2, numLabels + 1):
 		Y = np.append(Y,(y==i).astype(int))
 	#reshape so first dimension corresponds with label
-	Y = Y.reshape(10,5000)
-
-	X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
-	h1 = helper.sigmoid(np.matmul(X,theta1.transpose()))
-	h1 = np.insert(h1,0,np.ones(h1.shape[0]),axis=1) # adding bias unit
-	h2 = helper.sigmoid(np.matmul(h1,theta2.transpose())).transpose()
+	Y = Y.reshape(numLabels,y.shape[0])
+	return Y.transpose()
 
+def getThetas(nnParams,inputSize,hiddenLayerSize,outputSize):
+	theta1Length = (inputSize+1)*hiddenLayerSize
 
-	#calculate gradients
-	theta2Error = h2-Y
-	theta1Error = np.multiply(np.matmul(theta2Error.transpose(),theta2),np.multiply(h1,1-h1))
-	theta1Grad = np.matmul(theta1Error.transpose(),X)
-	theta1Grad = theta1Grad[1:,:]#drop bias unit error from hiddent layer
-	theta2Grad = np.matmul(theta2Error,h1)
-	
+	theta1 = nnParams[:theta1Length]
+	theta2 = nnParams[theta1Length:]
 
-	return np.array([theta1Grad.flatten(), theta2Grad.flatten()])
+	theta1 = theta1.reshape(hiddenLayerSize,inputSize+1)
+	theta2 = theta2.reshape(outputSize,hiddenLayerSize+1)
 
+	return[theta1, theta2]
 
 def sigmoidGradient(Z):
-	R = helper.sigmoid(Z)
-	return np.multiply(R,1-R)
+	R = sigmoid(Z)
+	return R*(1-R)
 
+def sigmoid(Z):
 
+	return 1/(1+np.exp(-Z))
 
+def optimizeNN(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal, maxIter):
+	return op.minimize(fun=nnCostFunction, x0=nnParams, args=(inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal), method='TNC', jac = BackPropagation, options={'maxiter': maxIter, 'disp': True})