xiaoyang-coder · Sep 24, 2017
diff --git a/Diff for: ‎__pycache__/checker.cpython-36.pyc
4.32 KB b/Diff for: ‎__pycache__/checker.cpython-36.pyc
4.32 KB
diff --git a/Diff for: ‎__pycache__/ex4Checker.cpython-36.pyc
108 Bytes b/Diff for: ‎__pycache__/ex4Checker.cpython-36.pyc
108 Bytes
diff --git a/Diff for: ‎__pycache__/ex4helper.cpython-36.pyc
-171 Bytes b/Diff for: ‎__pycache__/ex4helper.cpython-36.pyc
-171 Bytes
diff --git a/Diff for: ‎__pycache__/test.cpython-36.pyc
4.32 KB b/Diff for: ‎__pycache__/test.cpython-36.pyc
4.32 KB
diff --git a/Diff for: ‎checker.py
+118 b/Diff for: ‎checker.py
+118
diff --git a/Diff for: ‎ex4.py
+5-5 b/Diff for: ‎ex4.py
+5-5
diff --git a/Diff for: ‎ex4Checker.py
+12-9 b/Diff for: ‎ex4Checker.py
+12-9
diff --git a/Diff for: ‎ex4helper.py
+12-15 b/Diff for: ‎ex4helper.py
+12-15
@@ -0,0 +1,118 @@
+import numpy
+from numpy import (atleast_1d, eye, mgrid, argmin, zeros, shape, squeeze,
+                   vectorize, asarray, sqrt, Inf, asfarray, isinf)
+'''
+This file should be removed and Scipy's implementation of symmetric numerical gradient should be used once it is implemented.
+I had to create my own symmetric numerical gradient because Scipy doesn't have one
+'''
+
+
+
+
+_epsilon = sqrt(numpy.finfo(float).eps)
+
+def _approx_fprime_helper(xk, f, epsilon, args=()):
+    """
+    See ``approx_fprime``.  An optional initial function value arg is added.
+    """
+    grad = numpy.zeros((len(xk),), float)
+    ei = numpy.zeros((len(xk),), float)
+    for k in range(len(xk)):
+        ei[k] = 1.0
+        d = epsilon * ei
+        grad[k] = (f(*((xk + d,) + args)) - f(*((xk - d,) + args))) / (2*d[k])
+        ei[k] = 0.0
+    return grad
+
+
+def approx_fprime(xk, f, epsilon, *args):
+    """Finite-difference approximation of the gradient of a scalar function.
+    Parameters
+    ----------
+    xk : array_like
+        The coordinate vector at which to determine the gradient of `f`.
+    f : callable
+        The function of which to determine the gradient (partial derivatives).
+        Should take `xk` as first argument, other arguments to `f` can be
+        supplied in ``*args``.  Should return a scalar, the value of the
+        function at `xk`.
+    epsilon : array_like
+        Increment to `xk` to use for determining the function gradient.
+        If a scalar, uses the same finite difference delta for all partial
+        derivatives.  If an array, should contain one value per element of
+        `xk`.
+    \\*args : args, optional
+        Any other arguments that are to be passed to `f`.
+    Returns
+    -------
+    grad : ndarray
+        The partial derivatives of `f` to `xk`.
+    See Also
+    --------
+    check_grad : Check correctness of gradient function against approx_fprime.
+    Notes
+    -----
+    The function gradient is determined by the forward finite difference
+    formula::
+                 f(xk[i] + epsilon[i]) - f(xk[i])
+        f'[i] = ---------------------------------
+                            epsilon[i]
+    The main use of `approx_fprime` is in scalar function optimizers like
+    `fmin_bfgs`, to determine numerically the Jacobian of a function.
+    Examples
+    --------
+    >>> from scipy import optimize
+    >>> def func(x, c0, c1):
+    ...     "Coordinate vector `x` should be an array of size two."
+    ...     return c0 * x[0]**2 + c1*x[1]**2
+    >>> x = np.ones(2)
+    >>> c0, c1 = (1, 200)
+    >>> eps = np.sqrt(np.finfo(float).eps)
+    >>> optimize.approx_fprime(x, func, [eps, np.sqrt(200) * eps], c0, c1)
+    array([   2.        ,  400.00004198])
+    """
+    return _approx_fprime_helper(xk, f, epsilon, args=args)
+
+
+def check_grad(func, grad, x0, *args, **kwargs):
+    """Check the correctness of a gradient function by comparing it against a
+    (forward) finite-difference approximation of the gradient.
+    Parameters
+    ----------
+    func : callable ``func(x0, *args)``
+        Function whose derivative is to be checked.
+    grad : callable ``grad(x0, *args)``
+        Gradient of `func`.
+    x0 : ndarray
+        Points to check `grad` against forward difference approximation of grad
+        using `func`.
+    args : \\*args, optional
+        Extra arguments passed to `func` and `grad`.
+    epsilon : float, optional
+        Step size used for the finite difference approximation. It defaults to
+        ``sqrt(numpy.finfo(float).eps)``, which is approximately 1.49e-08.
+    Returns
+    -------
+    err : float
+        The square root of the sum of squares (i.e. the 2-norm) of the
+        difference between ``grad(x0, *args)`` and the finite difference
+        approximation of `grad` using func at the points `x0`.
+    See Also
+    --------
+    approx_fprime
+    Examples
+    --------
+    >>> def func(x):
+    ...     return x[0]**2 - 0.5 * x[1]**3
+    >>> def grad(x):
+    ...     return [2 * x[0], -1.5 * x[1]**2]
+    >>> from scipy.optimize import check_grad
+    >>> check_grad(func, grad, [1.5, -1.5])
+    2.9802322387695312e-08
+    """
+    step = kwargs.pop('epsilon', _epsilon)
+    if kwargs:
+        raise ValueError("Unknown keyword arguments: %r" %
+                         (list(kwargs.keys()),))
+    return sqrt(sum((grad(x0, *args) -
+                     approx_fprime(x0, func, step, *args))**2))
@@ -180,14 +180,14 @@
 
 print('\nTraining Neural Network... ')
 
-MaxIter = 50
-lambdaVal = 1
+#MaxIter = 50
+#lambdaVal = 1
 
-finalParams = helper.optimizeNN(initialNNParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, MaxIter)
+#finalParams = helper.optimizeNN(initialNNParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, MaxIter)
 
-print(finalParams)
+#print(finalParams)
 
-input('\nPart 9 completed. Program paused. Press enter to continue: ')
+#input('\nPart 9 completed. Program paused. Press enter to continue: ')
 
 ## ================= Part 10: Visualize Weights =================
 #  You can now "visualize" what the neural network is learning by 
 
@@ -1,16 +1,14 @@
 import numpy as np
 import matplotlib.pyplot as plt
-import scipy.optimize as op
+import checker as op
 import ex4helper as helper
 import math
 import matplotlib.image as mpimg
 from numpy import linalg as LA
 
-def backPropMask(nnParams,*args):
-	return helper.BackPropagation(nnParams,*args)
-	
-def costMask(nnParams,*args):
-	return helper.nnCostFunction(nnParams,*args)
+
+def main():
+	checkNNGradients(0)
 
 def checkNNGradients(lambdaVal):
 	#   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
@@ -47,7 +45,7 @@ def checkNNGradients(lambdaVal):
 	print('\nComparing Gradients: (numGrad, grad, absolute difference)')
 
 	for i in range(0,numGrad.shape[0]):
-		print("{}: {:.9f}, {:.9f} {:.9f}".format(i+1, numGrad[i], grad[i], abs(numGrad[i] - grad[i] )))
+		print("{}: {:.9f}, {:.9f} {:.9f}".format(i+1, numGrad[i], grad[i], abs(numGrad[i] - grad[i])))
 
 
 	print('The above two columns you get should be very similar.')
@@ -61,7 +59,6 @@ def checkNNGradients(lambdaVal):
 	print('the relative difference will be small (less than 1e-9).')
 	print('Relative Difference: {}'.format(diff))
 
-
 def debugInitializeWeights(fanOut, fanIn):
    	# Initialize W using "sin", this ensures that  vW is always of the same
 	# values and will be useful for debugging
@@ -74,6 +71,12 @@ def debugInitializeWeights(fanOut, fanIn):
 	W = np.sin(W)/10
 	return W
 
+def backPropMask(nnParams,*args):
+	return helper.BackPropagation(nnParams,*args)
+	
+def costMask(nnParams,*args):
+	return helper.nnCostFunction(nnParams,*args)
+
 if __name__ == '__main__':
-	checkNNGradients(0)
+	main()
 
@@ -14,14 +14,14 @@ def nnCostFunction(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambd
 	#prepare Y matrix for cost function
 	Y = getYMatrix(y)
 
-	X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
-	h1 = sigmoid(np.matmul(X,theta1.transpose()))
-	h1 = np.insert(h1,0,np.ones(h1.shape[0]),axis=1) # adding bias unit
-	h2 = sigmoid(np.matmul(h1,theta2.transpose()))
+	#forward Pass
+	[a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
+
 
 	#getting regulation parameters
 	R1 = theta1[:,1:]
 	R2 = theta2[:,1:]
+
 	# calculating the cost of regulation
 	costRegulation = lambdaVal*(np.sum(np.square(R1.flatten())) + np.sum(np.square(R2.flatten())))/(2*m)
 
@@ -37,28 +37,24 @@ def nnCostFunction(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambd
 def BackPropagation(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal):
 	#get num examples
 	m = X.shape[0]
-
 	#get Theta Matrices
 	[theta1, theta2] = getThetas(nnParams,inputSize,hiddenLayerSize,outputSize)
 
+
 	#prepare Y matrix for cost function
 	Y = getYMatrix(y) #5x3
 
 	#forward Pass
 	[a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
 	#a1 = 5x4, z1 = 5x5, a2 = 5x5, a2 = 5x6, z2 = 5x3, h2 = 5x3
 
-
 	#backward
 	theta2Error = h2-Y #5x3
 	theta1Error = np.matmul(theta2Error,theta2[:,1:])*sigmoidGradient(z1)
 
 	D1 = np.matmul(theta1Error.transpose(),a1)
 	D2 = np.matmul(theta2Error.transpose(),a2)
 
-	theta2delta = theta2Error * (h2*(1-h2))
-	theta2other = np.dot(a2.transpose(),theta2delta)
-	print(theta2other.flatten())
 	#average the gradient per example	
 	theta1Grad = D1/m
 	theta2Grad = D2/m
@@ -79,17 +75,19 @@ def forwardPass(nnParams, X):
 	theta1 = nnParams[0]
 	theta2 = nnParams[1]
 
+	#left side is the example count
 	#layer 1
-	a1 = np.insert(X,0,np.ones(X.shape[0]),axis=1)
-	z1 = np.matmul(a1,theta1.transpose())
-	a2 = sigmoid(z1)
+	a1 = np.insert(X,0,np.ones(X.shape[0]),axis=1)#5x4
+	z1 = np.matmul(a1,theta1.transpose())#5x5
+	a2 = sigmoid(z1)#5x5
+
 
 	#layer 2
 	a2 = np.insert(a2,0,np.ones(a1.shape[0]),axis=1) # adding bias unit  5x6
 	z2 = np.matmul(a2,theta2.transpose()) #5x3
-	h2 = sigmoid(z2) #5x3
+	a3 = sigmoid(z2) #5x3
 
-	return [a1, z1, a2, z2, h2]
+	return [a1, z1, a2, z2, a3]
 
 def getYMatrix(y):
 	#prepare Y matrix for cost function
@@ -119,7 +117,6 @@ def sigmoidGradient(Z):
 	return R*(1-R)
 
 def sigmoid(Z):
-
 	return 1/(1+np.exp(-Z))
 
 def optimizeNN(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal, maxIter):