Skip to content

Commit e5089c0

Browse files
committedSep 24, 2017
Created my own numerical gradient functions
Scipy uses a one sided method, but it tends to be less accurate. In better news, my implementation of back propagation works!
1 parent e2a5f23 commit e5089c0

8 files changed

+147
-29
lines changed
 

Diff for: ‎__pycache__/checker.cpython-36.pyc

4.32 KB
Binary file not shown.

Diff for: ‎__pycache__/ex4Checker.cpython-36.pyc

108 Bytes
Binary file not shown.

Diff for: ‎__pycache__/ex4helper.cpython-36.pyc

-171 Bytes
Binary file not shown.

Diff for: ‎__pycache__/test.cpython-36.pyc

4.32 KB
Binary file not shown.

Diff for: ‎checker.py

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import numpy
2+
from numpy import (atleast_1d, eye, mgrid, argmin, zeros, shape, squeeze,
3+
vectorize, asarray, sqrt, Inf, asfarray, isinf)
4+
'''
5+
This file should be removed and Scipy's implementation of symmetric numerical gradient should be used once it is implemented.
6+
I had to create my own symmetric numerical gradient because Scipy doesn't have one
7+
'''
8+
9+
10+
11+
12+
_epsilon = sqrt(numpy.finfo(float).eps)
13+
14+
def _approx_fprime_helper(xk, f, epsilon, args=()):
15+
"""
16+
See ``approx_fprime``. An optional initial function value arg is added.
17+
"""
18+
grad = numpy.zeros((len(xk),), float)
19+
ei = numpy.zeros((len(xk),), float)
20+
for k in range(len(xk)):
21+
ei[k] = 1.0
22+
d = epsilon * ei
23+
grad[k] = (f(*((xk + d,) + args)) - f(*((xk - d,) + args))) / (2*d[k])
24+
ei[k] = 0.0
25+
return grad
26+
27+
28+
def approx_fprime(xk, f, epsilon, *args):
29+
"""Finite-difference approximation of the gradient of a scalar function.
30+
Parameters
31+
----------
32+
xk : array_like
33+
The coordinate vector at which to determine the gradient of `f`.
34+
f : callable
35+
The function of which to determine the gradient (partial derivatives).
36+
Should take `xk` as first argument, other arguments to `f` can be
37+
supplied in ``*args``. Should return a scalar, the value of the
38+
function at `xk`.
39+
epsilon : array_like
40+
Increment to `xk` to use for determining the function gradient.
41+
If a scalar, uses the same finite difference delta for all partial
42+
derivatives. If an array, should contain one value per element of
43+
`xk`.
44+
\\*args : args, optional
45+
Any other arguments that are to be passed to `f`.
46+
Returns
47+
-------
48+
grad : ndarray
49+
The partial derivatives of `f` to `xk`.
50+
See Also
51+
--------
52+
check_grad : Check correctness of gradient function against approx_fprime.
53+
Notes
54+
-----
55+
The function gradient is determined by the forward finite difference
56+
formula::
57+
f(xk[i] + epsilon[i]) - f(xk[i])
58+
f'[i] = ---------------------------------
59+
epsilon[i]
60+
The main use of `approx_fprime` is in scalar function optimizers like
61+
`fmin_bfgs`, to determine numerically the Jacobian of a function.
62+
Examples
63+
--------
64+
>>> from scipy import optimize
65+
>>> def func(x, c0, c1):
66+
... "Coordinate vector `x` should be an array of size two."
67+
... return c0 * x[0]**2 + c1*x[1]**2
68+
>>> x = np.ones(2)
69+
>>> c0, c1 = (1, 200)
70+
>>> eps = np.sqrt(np.finfo(float).eps)
71+
>>> optimize.approx_fprime(x, func, [eps, np.sqrt(200) * eps], c0, c1)
72+
array([ 2. , 400.00004198])
73+
"""
74+
return _approx_fprime_helper(xk, f, epsilon, args=args)
75+
76+
77+
def check_grad(func, grad, x0, *args, **kwargs):
78+
"""Check the correctness of a gradient function by comparing it against a
79+
(forward) finite-difference approximation of the gradient.
80+
Parameters
81+
----------
82+
func : callable ``func(x0, *args)``
83+
Function whose derivative is to be checked.
84+
grad : callable ``grad(x0, *args)``
85+
Gradient of `func`.
86+
x0 : ndarray
87+
Points to check `grad` against forward difference approximation of grad
88+
using `func`.
89+
args : \\*args, optional
90+
Extra arguments passed to `func` and `grad`.
91+
epsilon : float, optional
92+
Step size used for the finite difference approximation. It defaults to
93+
``sqrt(numpy.finfo(float).eps)``, which is approximately 1.49e-08.
94+
Returns
95+
-------
96+
err : float
97+
The square root of the sum of squares (i.e. the 2-norm) of the
98+
difference between ``grad(x0, *args)`` and the finite difference
99+
approximation of `grad` using func at the points `x0`.
100+
See Also
101+
--------
102+
approx_fprime
103+
Examples
104+
--------
105+
>>> def func(x):
106+
... return x[0]**2 - 0.5 * x[1]**3
107+
>>> def grad(x):
108+
... return [2 * x[0], -1.5 * x[1]**2]
109+
>>> from scipy.optimize import check_grad
110+
>>> check_grad(func, grad, [1.5, -1.5])
111+
2.9802322387695312e-08
112+
"""
113+
step = kwargs.pop('epsilon', _epsilon)
114+
if kwargs:
115+
raise ValueError("Unknown keyword arguments: %r" %
116+
(list(kwargs.keys()),))
117+
return sqrt(sum((grad(x0, *args) -
118+
approx_fprime(x0, func, step, *args))**2))

Diff for: ‎ex4.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,14 @@
180180

181181
print('\nTraining Neural Network... ')
182182

183-
MaxIter = 50
184-
lambdaVal = 1
183+
#MaxIter = 50
184+
#lambdaVal = 1
185185

186-
finalParams = helper.optimizeNN(initialNNParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, MaxIter)
186+
#finalParams = helper.optimizeNN(initialNNParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, MaxIter)
187187

188-
print(finalParams)
188+
#print(finalParams)
189189

190-
input('\nPart 9 completed. Program paused. Press enter to continue: ')
190+
#input('\nPart 9 completed. Program paused. Press enter to continue: ')
191191

192192
## ================= Part 10: Visualize Weights =================
193193
# You can now "visualize" what the neural network is learning by

Diff for: ‎ex4Checker.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
import numpy as np
22
import matplotlib.pyplot as plt
3-
import scipy.optimize as op
3+
import checker as op
44
import ex4helper as helper
55
import math
66
import matplotlib.image as mpimg
77
from numpy import linalg as LA
88

9-
def backPropMask(nnParams,*args):
10-
return helper.BackPropagation(nnParams,*args)
11-
12-
def costMask(nnParams,*args):
13-
return helper.nnCostFunction(nnParams,*args)
9+
10+
def main():
11+
checkNNGradients(0)
1412

1513
def checkNNGradients(lambdaVal):
1614
# CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
@@ -47,7 +45,7 @@ def checkNNGradients(lambdaVal):
4745
print('\nComparing Gradients: (numGrad, grad, absolute difference)')
4846

4947
for i in range(0,numGrad.shape[0]):
50-
print("{}: {:.9f}, {:.9f} {:.9f}".format(i+1, numGrad[i], grad[i], abs(numGrad[i] - grad[i] )))
48+
print("{}: {:.9f}, {:.9f} {:.9f}".format(i+1, numGrad[i], grad[i], abs(numGrad[i] - grad[i])))
5149

5250

5351
print('The above two columns you get should be very similar.')
@@ -61,7 +59,6 @@ def checkNNGradients(lambdaVal):
6159
print('the relative difference will be small (less than 1e-9).')
6260
print('Relative Difference: {}'.format(diff))
6361

64-
6562
def debugInitializeWeights(fanOut, fanIn):
6663
# Initialize W using "sin", this ensures that vW is always of the same
6764
# values and will be useful for debugging
@@ -74,6 +71,12 @@ def debugInitializeWeights(fanOut, fanIn):
7471
W = np.sin(W)/10
7572
return W
7673

74+
def backPropMask(nnParams,*args):
75+
return helper.BackPropagation(nnParams,*args)
76+
77+
def costMask(nnParams,*args):
78+
return helper.nnCostFunction(nnParams,*args)
79+
7780
if __name__ == '__main__':
78-
checkNNGradients(0)
81+
main()
7982

Diff for: ‎ex4helper.py

+12-15
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ def nnCostFunction(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambd
1414
#prepare Y matrix for cost function
1515
Y = getYMatrix(y)
1616

17-
X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
18-
h1 = sigmoid(np.matmul(X,theta1.transpose()))
19-
h1 = np.insert(h1,0,np.ones(h1.shape[0]),axis=1) # adding bias unit
20-
h2 = sigmoid(np.matmul(h1,theta2.transpose()))
17+
#forward Pass
18+
[a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
19+
2120

2221
#getting regulation parameters
2322
R1 = theta1[:,1:]
2423
R2 = theta2[:,1:]
24+
2525
# calculating the cost of regulation
2626
costRegulation = lambdaVal*(np.sum(np.square(R1.flatten())) + np.sum(np.square(R2.flatten())))/(2*m)
2727

@@ -37,28 +37,24 @@ def nnCostFunction(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambd
3737
def BackPropagation(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal):
3838
#get num examples
3939
m = X.shape[0]
40-
4140
#get Theta Matrices
4241
[theta1, theta2] = getThetas(nnParams,inputSize,hiddenLayerSize,outputSize)
4342

43+
4444
#prepare Y matrix for cost function
4545
Y = getYMatrix(y) #5x3
4646

4747
#forward Pass
4848
[a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
4949
#a1 = 5x4, z1 = 5x5, a2 = 5x5, a2 = 5x6, z2 = 5x3, h2 = 5x3
5050

51-
5251
#backward
5352
theta2Error = h2-Y #5x3
5453
theta1Error = np.matmul(theta2Error,theta2[:,1:])*sigmoidGradient(z1)
5554

5655
D1 = np.matmul(theta1Error.transpose(),a1)
5756
D2 = np.matmul(theta2Error.transpose(),a2)
5857

59-
theta2delta = theta2Error * (h2*(1-h2))
60-
theta2other = np.dot(a2.transpose(),theta2delta)
61-
print(theta2other.flatten())
6258
#average the gradient per example
6359
theta1Grad = D1/m
6460
theta2Grad = D2/m
@@ -79,17 +75,19 @@ def forwardPass(nnParams, X):
7975
theta1 = nnParams[0]
8076
theta2 = nnParams[1]
8177

78+
#left side is the example count
8279
#layer 1
83-
a1 = np.insert(X,0,np.ones(X.shape[0]),axis=1)
84-
z1 = np.matmul(a1,theta1.transpose())
85-
a2 = sigmoid(z1)
80+
a1 = np.insert(X,0,np.ones(X.shape[0]),axis=1)#5x4
81+
z1 = np.matmul(a1,theta1.transpose())#5x5
82+
a2 = sigmoid(z1)#5x5
83+
8684

8785
#layer 2
8886
a2 = np.insert(a2,0,np.ones(a1.shape[0]),axis=1) # adding bias unit 5x6
8987
z2 = np.matmul(a2,theta2.transpose()) #5x3
90-
h2 = sigmoid(z2) #5x3
88+
a3 = sigmoid(z2) #5x3
9189

92-
return [a1, z1, a2, z2, h2]
90+
return [a1, z1, a2, z2, a3]
9391

9492
def getYMatrix(y):
9593
#prepare Y matrix for cost function
@@ -119,7 +117,6 @@ def sigmoidGradient(Z):
119117
return R*(1-R)
120118

121119
def sigmoid(Z):
122-
123120
return 1/(1+np.exp(-Z))
124121

125122
def optimizeNN(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal, maxIter):

0 commit comments

Comments
 (0)
Please sign in to comment.