1
- import numpy as np
1
+ import numpy as np
2
2
import matplotlib .pyplot as plt
3
3
import scipy .optimize as op
4
- import ex2helper as helper
5
4
import math
6
5
import matplotlib .image as mpimg
7
6
8
- def nnCostFunction (nnParams , X , y , lambdaVal ):
7
+ def nnCostFunction (nnParams , inputSize , hiddenLayerSize , outputSize , X , y , lambdaVal ):
8
+ #get num examples
9
9
m = X .shape [0 ]
10
- print (m )
11
- theta1 = nnParams [0 ]
12
- n1 = X .shape [1 ] + 1
13
- n2 = theta1 .shape [0 ]
14
- theta1 = theta1 .reshape (int (n2 / n1 ),n1 )
15
- theta2 = nnParams [1 ]
16
- n1 = theta1 .shape [0 ] + 1
17
- n2 = theta2 .shape [0 ]
18
- theta2 = theta2 .reshape (int (n2 / n1 ),n1 )
19
-
10
+
11
+ #get Theta Matrices
12
+ [theta1 , theta2 ] = getThetas (nnParams ,inputSize ,hiddenLayerSize ,outputSize )
20
13
21
14
#prepare Y matrix for cost function
22
- numLabels = np .unique (y ).shape [0 ]+ 1
23
- #create boolean array of value or not out of 1s and 0s
24
- Y = (y == 1 ).astype (int )
25
- for i in range (2 ,numLabels ):
26
- Y = np .append (Y ,(y == i ).astype (int ))
27
- #reshape so first dimension corresponds with label
28
- Y = Y .reshape (10 ,5000 )
15
+ Y = getYMatrix (y )
29
16
30
17
X = np .insert (X ,0 ,np .ones (X .shape [0 ]),axis = 1 ) # adding bias unit
31
- h1 = helper . sigmoid (np .matmul (X ,theta1 .transpose ()))
18
+ h1 = sigmoid (np .matmul (X ,theta1 .transpose ()))
32
19
h1 = np .insert (h1 ,0 ,np .ones (h1 .shape [0 ]),axis = 1 ) # adding bias unit
33
- h2 = helper . sigmoid (np .matmul (h1 ,theta2 .transpose ())). transpose ( )
20
+ h2 = sigmoid (np .matmul (h1 ,theta2 .transpose ()))
34
21
35
22
#getting regulation parameters
36
23
R1 = theta1 [:,1 :]
@@ -39,56 +26,101 @@ def nnCostFunction(nnParams, X, y, lambdaVal):
39
26
costRegulation = lambdaVal * (np .sum (np .square (R1 .flatten ())) + np .sum (np .square (R2 .flatten ())))/ (2 * m )
40
27
41
28
#calculating true cost without regulation
42
- cost = np .sum (np .multiply ( np . log (h2 ), Y )) + np .sum (np .multiply ( np . log (1 - h2 ), 1 - Y ))
29
+ cost = np .sum (np .log (h2 )* Y ) + np .sum (np .log (1 - h2 )* ( 1 - Y ))
43
30
cost = - cost / m
44
31
45
32
#calculate total cost
46
33
totalCost = cost + costRegulation
47
34
48
35
return totalCost
49
36
50
-
51
- def nnGradFunction ( nnParams , X , y , lambdaVal ):
37
+ def BackPropagation ( nnParams , inputSize , hiddenLayerSize , outputSize , X , y , lambdaVal ):
38
+ #get num examples
52
39
m = X .shape [0 ]
40
+
41
+ #get Theta Matrices
42
+ [theta1 , theta2 ] = getThetas (nnParams ,inputSize ,hiddenLayerSize ,outputSize )
43
+
44
+ #prepare Y matrix for cost function
45
+ Y = getYMatrix (y ) #5x3
46
+
47
+ #forward Pass
48
+ [a1 , z1 , a2 , z2 , h2 ] = forwardPass (np .array ([theta1 , theta2 ]), X )
49
+ #a1 = 5x4, z1 = 5x5, a2 = 5x5, a2 = 5x6, z2 = 5x3, h2 = 5x3
50
+
51
+
52
+ #backward
53
+ theta2Error = h2 - Y #5x3
54
+ theta1Error = np .matmul (theta2Error ,theta2 [:,1 :])* sigmoidGradient (z1 )
55
+
56
+ D1 = np .matmul (theta1Error .transpose (),a1 )
57
+ D2 = np .matmul (theta2Error .transpose (),a2 )
58
+
59
+ theta2delta = theta2Error * (h2 * (1 - h2 ))
60
+ theta2other = np .dot (a2 .transpose (),theta2delta )
61
+ print (theta2other .flatten ())
62
+ #average the gradient per example
63
+ theta1Grad = D1 / m
64
+ theta2Grad = D2 / m
65
+
66
+ #calculate regulation terms
67
+ theta1Reg = lambdaVal * theta1 / m
68
+ theta2Reg = lambdaVal * theta2 / m
69
+ theta1Reg [:,0 ] = 0
70
+ theta2Reg [:,0 ] = 0
71
+
72
+ #combine gradient and regulation terms
73
+ theta1Grad = theta1Grad + theta1Reg
74
+ theta2Grad = theta2Grad + theta2Reg
75
+
76
+ return np .append (theta1Grad .flatten (), theta2Grad .flatten ())
77
+
78
+ def forwardPass (nnParams , X ):
53
79
theta1 = nnParams [0 ]
54
- n1 = X .shape [1 ] + 1
55
- n2 = theta1 .shape [0 ]
56
- theta1 = theta1 .reshape (int (n2 / n1 ),n1 )
57
80
theta2 = nnParams [1 ]
58
- n1 = theta1 .shape [0 ] + 1
59
- n2 = theta2 .shape [0 ]
60
- theta2 = theta2 .reshape (int (n2 / n1 ),n1 )
61
-
62
81
82
+ #layer 1
83
+ a1 = np .insert (X ,0 ,np .ones (X .shape [0 ]),axis = 1 )
84
+ z1 = np .matmul (a1 ,theta1 .transpose ())
85
+ a2 = sigmoid (z1 )
86
+
87
+ #layer 2
88
+ a2 = np .insert (a2 ,0 ,np .ones (a1 .shape [0 ]),axis = 1 ) # adding bias unit 5x6
89
+ z2 = np .matmul (a2 ,theta2 .transpose ()) #5x3
90
+ h2 = sigmoid (z2 ) #5x3
91
+
92
+ return [a1 , z1 , a2 , z2 , h2 ]
93
+
94
+ def getYMatrix (y ):
63
95
#prepare Y matrix for cost function
64
- numLabels = np .unique (y ).shape [0 ]+ 1
96
+ numLabels = np .unique (y ).shape [0 ]
97
+
65
98
#create boolean array of value or not out of 1s and 0s
66
99
Y = (y == 1 ).astype (int )
67
- for i in range (2 ,numLabels ):
100
+ for i in range (2 , numLabels + 1 ):
68
101
Y = np .append (Y ,(y == i ).astype (int ))
69
102
#reshape so first dimension corresponds with label
70
- Y = Y .reshape (10 ,5000 )
71
-
72
- X = np .insert (X ,0 ,np .ones (X .shape [0 ]),axis = 1 ) # adding bias unit
73
- h1 = helper .sigmoid (np .matmul (X ,theta1 .transpose ()))
74
- h1 = np .insert (h1 ,0 ,np .ones (h1 .shape [0 ]),axis = 1 ) # adding bias unit
75
- h2 = helper .sigmoid (np .matmul (h1 ,theta2 .transpose ())).transpose ()
103
+ Y = Y .reshape (numLabels ,y .shape [0 ])
104
+ return Y .transpose ()
76
105
106
+ def getThetas (nnParams ,inputSize ,hiddenLayerSize ,outputSize ):
107
+ theta1Length = (inputSize + 1 )* hiddenLayerSize
77
108
78
- #calculate gradients
79
- theta2Error = h2 - Y
80
- theta1Error = np .multiply (np .matmul (theta2Error .transpose (),theta2 ),np .multiply (h1 ,1 - h1 ))
81
- theta1Grad = np .matmul (theta1Error .transpose (),X )
82
- theta1Grad = theta1Grad [1 :,:]#drop bias unit error from hiddent layer
83
- theta2Grad = np .matmul (theta2Error ,h1 )
84
-
109
+ theta1 = nnParams [:theta1Length ]
110
+ theta2 = nnParams [theta1Length :]
85
111
86
- return np .array ([theta1Grad .flatten (), theta2Grad .flatten ()])
112
+ theta1 = theta1 .reshape (hiddenLayerSize ,inputSize + 1 )
113
+ theta2 = theta2 .reshape (outputSize ,hiddenLayerSize + 1 )
87
114
115
+ return [theta1 , theta2 ]
88
116
89
117
def sigmoidGradient (Z ):
90
- R = helper . sigmoid (Z )
91
- return np . multiply ( R , 1 - R )
118
+ R = sigmoid (Z )
119
+ return R * ( 1 - R )
92
120
121
+ def sigmoid (Z ):
93
122
123
+ return 1 / (1 + np .exp (- Z ))
94
124
125
+ def optimizeNN (nnParams , inputSize , hiddenLayerSize , outputSize , X , y , lambdaVal , maxIter ):
126
+ return op .minimize (fun = nnCostFunction , x0 = nnParams , args = (inputSize , hiddenLayerSize , outputSize , X , y , lambdaVal ), method = 'TNC' , jac = BackPropagation , options = {'maxiter' : maxIter , 'disp' : True })
0 commit comments