xiaoyang-coder · Nov 10, 2017
diff --git a/‎__pycache__/ex1helper.cpython-36.pyc
1.77 KB b/‎__pycache__/ex1helper.cpython-36.pyc
1.77 KB
diff --git a/‎__pycache__/ex2helper.cpython-36.pyc
3.9 KB b/‎__pycache__/ex2helper.cpython-36.pyc
3.9 KB
diff --git a/‎__pycache__/ex3helper.cpython-36.pyc
2.44 KB b/‎__pycache__/ex3helper.cpython-36.pyc
2.44 KB
diff --git a/‎data/createVocabList.py
+55-59 b/‎data/createVocabList.py
+55-59
diff --git a/‎ex1.py
+167-130 b/‎ex1.py
+167-130
diff --git a/‎ex1_multi.py
+158-154 b/‎ex1_multi.py
+158-154
diff --git a/‎ex1helper.py
+41-37 b/‎ex1helper.py
+41-37
diff --git a/‎ex2.py
+122-127 b/‎ex2.py
+122-127
diff --git a/‎ex2_reg.py
+125-121 b/‎ex2_reg.py
+125-121
diff --git a/‎ex2helper.py
+34-9 b/‎ex2helper.py
+34-9
diff --git a/‎ex3.py
+74-73 b/‎ex3.py
+74-73
diff --git a/‎ex3_nn.py
+71-76 b/‎ex3_nn.py
+71-76
diff --git a/‎ex3helper.py
+111-96 b/‎ex3helper.py
+111-96
diff --git a/‎ex4.py
+206-173 b/‎ex4.py
+206-173
diff --git a/‎ex4Checker.py
+96-65 b/‎ex4Checker.py
+96-65
diff --git a/‎ex4helper.py
+168-106 b/‎ex4helper.py
+168-106
diff --git a/‎ex5.py
+280-211 b/‎ex5.py
+280-211
diff --git a/‎ex5helper.py
+187-112 b/‎ex5helper.py
+187-112
diff --git a/‎ex6.py
+113-108 b/‎ex6.py
+113-108
@@ -1,84 +1,80 @@
+'''creatVocabList is used to translate vocabList.txt into a python
+dictionary object. It will associate each word with a representational
+number in a python dictionary and use the PrettyPrint module to create
+a python file from the resulting dictionary to be used in other files.
 '''
-creatVocabList is used to translate vocabList.txt into a python dictionary object. 
-It will associate each word with a representational number in a python dictionary 
-and use the PrettyPrint module to create a python file from the resulting dictionary
-to be used in other files. 
-'''
+
+# Imports
 import os
 import pprint
 
-def main():
-	pyDict = pyDictCreator('./vocab.txt')
-	pyDict.creatDict()
-	pyDict.createPyDict()
 
+def main():
+    pyDict = pyDictCreator('./vocab.txt')
+    pyDict.creatDict()
+    pyDict.createPyDict()
 
 
 class pyDictCreator():
-	def __init__(self,filePath):
-
-		#if the file path is absolute, make it an absolute path
-		if(not os.path.isabs(filePath)):
-			filepath = os.path.abspath(filePath)
-
-
-		#set variables
-		self.directory = os.path.dirname(filepath)
-		self.basename = os.path.basename(filepath)
-		self.filepath = filepath
-
-		#sets up empty dictionary
-		self.dictionary = {}
-		self.inverseDictionary = {}
-
-	def setFilePath(self, filepath):
-		#if the file path is absolute, make it an absolute path
-		if(not os.path.isabs(filePath)):
-			filepath = os.path.abspath(filePath)
-
+    def __init__(self, filePath):
 
-		#set variables
-		self.directory = os.path.dirname(filepath)
-		self.basename = os.path.basename(filepath)
-		self.filepath = filepath
+        # if the file path is absolute, make it an absolute path
+        if(not os.path.isabs(filePath)):
+            filepath = os.path.abspath(filePath)
 
-		#sets up empty dictionary
-		self.dictionary = {}
+        # set variables
+        self.directory = os.path.dirname(filepath)
+        self.basename = os.path.basename(filepath)
+        self.filepath = filepath
 
-	def creatDict(self):
-		dictionary = {}
+        # sets up empty dictionary
+        self.dictionary = {}
+        self.inverseDictionary = {}
 
-		txtFile = open(self.filepath, 'r')
+    def setFilePath(self, filepath):
+        # if the file path is absolute, make it an absolute path
+        if(not os.path.isabs(filePath)):
+            filepath = os.path.abspath(filePath)
 
-		#get the dictionary words
-		words = txtFile.readlines()
-		txtFile.close()
+        # set variables
+        self.directory = os.path.dirname(filepath)
+        self.basename = os.path.basename(filepath)
+        self.filepath = filepath
 
-		#value to start representing numbers
-		#iterate through words adding them to the dictionary
-		for word in words:
-			value = word.split()
-			dictionary[value[1]] = int(value[0])
+        # sets up empty dictionary
+        self.dictionary = {}
 
-		self.dictionary = dictionary
+    def creatDict(self):
+        dictionary = {}
 
-		return dictionary
+        txtFile = open(self.filepath, 'r')
 
+        # get the dictionary words
+        words = txtFile.readlines()
+        txtFile.close()
 
-	def createPyDict(self):
-		os.chdir(self.directory)
+        # value to start representing numbers
+        # iterate through words adding them to the dictionary
+        for word in words:
+            value = word.split()
+            dictionary[value[1]] = int(value[0])
 
-		#open  new file with the same name as the original, but with extension .py
-		name = self.basename.split('.')
-		name = name[0]  + '.py'
-		pyDict = open(name, 'w')
+        self.dictionary = dictionary
 
+        return dictionary
 
-		pyDict.write('dictionary = ' + pprint.pformat(self.dictionary) + '\n')
-		pyDict.write('dictionary = ' + pprint.pformat(self.dictionary) + '\n')
-		pyDict.close()
+    def createPyDict(self):
+        os.chdir(self.directory)
 
+        # open  new file with the same name as the original,
+        # but with extension .py
+        name = self.basename.split('.')
+        name = name[0] + '.py'
+        pyDict = open(name, 'w')
 
+        pyDict.write('dictionary = ' + pprint.pformat(self.dictionary) + '\n')
+        pyDict.write('dictionary = ' + pprint.pformat(self.dictionary) + '\n')
+        pyDict.close()
 
 if __name__ == '__main__':
-	main()
+    main()
@@ -1,27 +1,24 @@
-## Machine Learning Online Class - Exercise 1: Linear Regression
-
-#  Instructions
-#  ------------
-#
-#  This is the initialization file for exercise 1 of Andrew Ng's Machine learning course
-#  All of the file have been converted into a python implementation instead of the original
-#  Matlab implementation. This 
-#
-#     warmUpExercise.py - complete
-#     plotData.py - complete
-#     gradientDescent.py - complete
-#     computeCost.py - complete
-#     gradientDescentMulti.py - complete
-#     computeCostMulti.py - complete
-#     featureNormalize.py - complete
-#     normalEqn.py - complete
-#
-# x refers to the population size in 10,000s
-# y refers to the profit in $10,000s
-#
-# 
-## ==================== Part 1: Basic Function ====================
-# Complete warmUpExercise.py
+""" Machine Learning Online Class - Exercise 1: Linear Regression
+Instructions
+------------
+This is the initialization file for exercise 1
+of Andrew Ng's Machine learning course. All of
+the file have been converted into a python
+implementation instead of the original.
+Matlab implementation.
+   warmUpExercise - Complete
+   plotData - Complete
+   gradientDescent - Complete
+   computeCost - Complete
+   gradientDescentMulti - Complete
+   computeCostMulti - Complete
+   featureNormalize - Complete
+   normalEqn - Complete
+refers to the population size in 10,000s
+refers to the profit in $10,000s
+"""
+
+# Imports
 import numpy as np
 import ex1helper as helper
 from mpl_toolkits.mplot3d import Axes3D
@@ -30,109 +27,149 @@
 import matplotlib
 
 
-print("running warmUpExercise...")
-print('5x5 Identity Matrix:')
-
-eye = np.identity(5)
-print(eye)
-
-input('\nPart 1 completed. Program paused. Press enter to continue: ')
-
-## ======================= Part 2: Plotting =======================
-print('Plotting Data ...')
-data = np.genfromtxt('./data/ex1data1.txt', delimiter=',')
-
-x=np.array(data[:,0])
-x=np.expand_dims(x,axis=0)
-x=np.append(np.ones_like(x),x,axis=0)
-y=np.array(data[:,1])
-
-plt.scatter(x[1], y, label = "scatter", marker='x', color='r', s=10)
-plt.xlabel('Population of City in 10,000s')
-plt.ylabel('Profit in $10,000s')
-plt.title('Raw Data')
-plt.show()
-input('\nPart 2 completed. Program paused. Press enter to continue: ')
-
-## =================== Part 3: Cost and Gradient descent ===================
-theta = np.zeros(x.shape[0])
-
-# Some gradient descent settings
-iterations = 1500;
-alpha = 0.01;
-
-print('Testing the cost function ...')
-# compute and display initial cost
-J = helper.computeCost(x,y,theta)
-print("With theta = [0 0], \nCost computed = ", J)
-print("Expected cost value (approx) 32.07")
-
-
-# further testing of the cost function
-J = helper.computeCost(x, y, [-1, 2]);
-print("With theta = [-1 2], \nCost computed = ", J)
-print('Expected cost value (approx) 54.24');
-
-input('\n Program paused. Press enter to continue: ')
-
-print('Running Gradient Descent...')
-#run gradient descent
-theta, cost = helper.gradientDescent(x, y, theta, alpha, iterations)
-
-#print theta to screen
-print('Theta found by gradient descent:');
-print(theta)
-print('\nExpected theta values (approx):');
-print('[-3.6303 1.1664]');
-
-# Plot the linear fit
-plt.scatter(x[1], y, label = "scatter", marker='x', color='r', s=10)
-plt.plot(x[1],np.matmul(x.transpose(),theta), color='blue', linestyle='solid')
-plt.xlabel('Population of City in 10,000s')
-plt.ylabel('Profit in $10,000s')
-plt.title('Raw Data + Linear Fit')
-plt.show()
-
-# Predict values for population sizes of 35,000 and 70,000
-predict1 = np.matmul([1, 3.5],theta) 
-print('For population = 35,000, we predict a profit of ', predict1*10000);
-predict2 = np.matmul([1, 7],theta)
-print('For population = 70,000, we predict a profit of ', predict2*10000);
-
-input('\nPart 3 completed. Program paused. Press enter to continue: ')
-
-# ============= Part 4: Visualizing J(theta_0, theta_1) =============
-print('Visualizing J(theta_0, theta_1) ...')
-
-
-#Grid over which we will calculate J
-
-theta0 = np.linspace(-10, 10, 100)
-theta1 = np.linspace(-1, 4, 100)
-theta0Vals, theta1Vals = np.meshgrid(theta0,theta1)
-zs = np.array([helper.computeCost(x,y,[i,j]) for i,j in zip(np.ravel(theta0Vals), np.ravel(theta1Vals))])
-ZCosts = zs.reshape(theta0Vals.shape)
-
-min = np.amin(ZCosts)
-max = np.amax(ZCosts)
-norm = matplotlib.colors.Normalize(vmin=min, vmax=max, clip=True)
-
-
-
-fig = plt.figure(1)
-ax = fig.add_subplot(111, projection='3d')
-
-
-ax.plot_surface(theta0Vals,theta1Vals,ZCosts,cmap=cm.coolwarm, norm=norm)
-
-
-ax.set_xlabel('theta0')
-ax.set_ylabel('theta1')
-ax.set_zlabel('Cost')
-
-plt.figure(2)
-CS = plt.contour(theta0Vals, theta1Vals, ZCosts, np.logspace(-2,3,20))
-plt.scatter(theta[0], theta[1], label = "scatter", marker='x', color='r', s=10)
-plt.clabel(CS, inline=1, fontsize=10)
-plt.title('Simplest default with labels')
-plt.show()
+def main():
+
+    # ==================== Part 1: Basic Function ====================
+    # Complete warmUpExercise.py
+    print("running warmUpExercise...")
+    print('5x5 Identity Matrix:')
+
+    eye = np.identity(5)
+    print(eye)
+
+    input('\nPart 1 completed. Program paused. Press enter to continue: ')
+
+    # ======================= Part 2: Plotting =======================
+    print('Plotting Data ...')
+    data = np.genfromtxt('./data/ex1data1.txt', delimiter=',')
+
+    x = np.array(data[:, 0])
+    x = np.expand_dims(x, axis=0)
+    x = np.append(np.ones_like(x), x, axis=0)
+    y = np.array(data[:, 1])
+
+    plt.scatter(x[1], y, label="scatter", marker='x', color='r', s=10)
+    plt.xlabel('Population of City in 10,000s')
+    plt.ylabel('Profit in $10,000s')
+    plt.title('Raw Data')
+    plt.show()
+    input('\nPart 2 completed. Program paused. Press enter to continue: ')
+
+    # =================== Part 3: Cost and Gradient descent ===================
+    theta = np.zeros(x.shape[0])
+
+    # Some gradient descent settings
+    iterations = 1500
+    alpha = 0.01
+
+    print('Testing the cost function ...')
+    # compute and display initial cost
+    J = helper.computeCost(x, y, theta)
+    print("With theta = [0 0], \nCost computed = ", J)
+    print("Expected cost value (approx) 32.07")
+
+    # further testing of the cost function
+    J = helper.computeCost(x, y, [-1, 2])
+    print("With theta = [-1 2], \nCost computed = ", J)
+    print('Expected cost value (approx) 54.24')
+
+    input('\n Program paused. Press enter to continue: ')
+
+    print('Running Gradient Descent...')
+    # run gradient descent
+    theta, cost = helper.gradientDescent(x, y, theta, alpha, iterations)
+
+    # print theta to screen
+    print('Theta found by gradient descent:')
+    print(theta)
+    print('\nExpected theta values (approx):')
+    print('[-3.6303 1.1664]')
+
+    # Plot the linear fit
+    plt.scatter(
+        x[1],
+        y,
+        label="scatter",
+        marker='x',
+        color='r',
+        s=10)
+
+    plt.plot(
+        x[1],
+        np.matmul(
+            x.transpose(),
+            theta),
+        color='blue',
+        linestyle='solid')
+
+    plt.xlabel('Population of City in 10,000s')
+    plt.ylabel('Profit in $10,000s')
+    plt.title('Raw Data + Linear Fit')
+    plt.show()
+
+    # Predict values for population sizes of 35,000 and 70,000
+    predict1 = np.matmul([1, 3.5], theta)
+    print('For population = 35,000, we predict a profit of ', predict1*10000)
+    predict2 = np.matmul([1, 7], theta)
+    print('For population = 70,000, we predict a profit of ', predict2*10000)
+
+    input('\nPart 3 completed. Program paused. Press enter to continue: ')
+
+    # ============= Part 4: Visualizing J(theta_0, theta_1) =============
+    print('Visualizing J(theta_0, theta_1) ...')
+
+    # Grid over which we will calculate J
+
+    theta0 = np.linspace(-10, 10, 100)
+    theta1 = np.linspace(-1, 4, 100)
+    theta0Vals, theta1Vals = np.meshgrid(theta0, theta1)
+    zs = np.array(
+        [helper.computeCost(x, y, [i, j]) for i, j in zip(
+            np.ravel(theta0Vals),
+            np.ravel(theta1Vals))])
+
+    ZCosts = zs.reshape(theta0Vals.shape)
+
+    min = np.amin(ZCosts)
+    max = np.amax(ZCosts)
+    norm = matplotlib.colors.Normalize(vmin=min, vmax=max, clip=True)
+
+    fig = plt.figure(1)
+    ax = fig.add_subplot(111, projection='3d')
+
+    ax.plot_surface(
+        theta0Vals,
+        theta1Vals,
+        ZCosts,
+        cmap=cm.coolwarm,
+        norm=norm)
+
+    ax.set_xlabel('theta0')
+    ax.set_ylabel('theta1')
+    ax.set_zlabel('Cost')
+
+    plt.figure(2)
+    CS = plt.contour(
+        theta0Vals,
+        theta1Vals,
+        ZCosts,
+        np.logspace(-2, 3, 20))
+
+    plt.scatter(
+        theta[0],
+        theta[1],
+        label="scatter",
+        marker='x',
+        color='r',
+        s=10)
+
+    plt.clabel(
+        CS,
+        inline=1,
+        fontsize=10)
+
+    plt.title('Simplest default with labels')
+    plt.show
+
+if __name__ == '__main__':
+    main()
@@ -1,159 +1,163 @@
-## Machine Learning Online Class
-#  Exercise 1: Linear regression with multiple variables
-#
-#  Instructions
-#  ------------
-# 
-#  This file contains code that helps you get started on the
-#  linear regression exercise. 
-#
-#  You will need to complete the following functions in this 
-#  exericse:
-#
-#     warmUpExercise.py - complete
-#     plotData.py - complete
-#     gradientDescent.py - complete
-#     computeCost.py - complete
-#     gradientDescentMulti.py - complete
-#     computeCostMulti.py - complete
-#     featureNormalize.py - complete
-#     normalEqn.py - complete
-#
-#  For this part of the exercise, you will need to change some
-#  parts of the code below for various experiments (e.g., changing
-#  learning rates).
-#
-#
-## Initialization
-#
-## ================ Part 1: Feature Normalization ================
-#
-## Clear and Close Figures
+"""Machine Learning Online Class Exercise 1: Linear regression with multiple variables
+Instructions
+------------
+This file contains code that helps you get started on the
+linear regression exercise.
+You will need to complete the following functions in this
+exericse:
+   warmUpExercise - Complete
+   plotData - Complete
+   gradientDescent - Complete
+   computeCost - Complete
+   gradientDescentMulti - Complete
+   computeCostMulti - Complete
+   featureNormalize - Complete
+   normalEqn - Complete
+For this part of the exercise, you will need to change some
+parts of the code below for various experiments (e.g., changing
+learning rates).
+"""
+
+# imports
 import numpy as np
 import ex1helper as helper
 import matplotlib.pyplot as plt
 
 
-
-print('Loading data...')
-data = np.genfromtxt('./data/ex1data2.txt', delimiter=',')
-x = np.array(data[:,:2])
-y = np.array(data[:,2])
-m = y.shape[0]
-
-
-# Print out some data points
-print('First 10 examples from the dataset: ')
-for i in range(0,10):
-	print("x = [%.0f %.0f], y = %.0f" % (x[i,0], x[i,1], y[i]))
-
-input('Program paused. Press enter to continue: ')
-
-print('\nNormalize Features...')
-
-x,mu,sigma = helper.featureNormalize(x)
-
-
-#add bias unit
-r = x
-x = np.ones((x.shape[0], x.shape[1]+1))
-x[:,1:] = r
-
-
-## ================ Part 2: Gradient Descent ================
-
-# ====================== YOUR CODE HERE ======================
-# Instructions: We have provided you with the following starter
-#               code that runs gradient descent with a particular
-#               learning rate (alpha). 
-#
-#               Your task is to first make sure that your functions - 
-#               computeCost and gradientDescent already work with 
-#               this starter code and support multiple variables.
-#
-#               After that, try running gradient descent with 
-#               different values of alpha and see which one gives
-#               you the best result.
-#
-#               Finally, you should complete the code at the end
-#               to predict the price of a 1650 sq-ft, 3 br house.
-#
-# Hint: By using the 'hold on' command, you can plot multiple
-#       graphs on the same figure.
-#
-# Hint: At prediction, make sure you do the same feature normalization.
-#
-print('\n\nPart 1 complete.')
-print('\nRunning gradient descent ...');
-
-# Choose some alpha value
-alpha = 0.01
-num_iters = 400
-
-# Init Theta and Run Gradient Descent 
-theta = np.zeros(3);
-[theta, J_history] = helper.gradientDescentMulti(x, y, theta, alpha, num_iters);
-plt.plot(range(0,num_iters), J_history, color='blue', linestyle='solid')
-plt.xlabel('iterations')
-plt.ylabel('Cost J')
-plt.show()
-print('Theta computed from gradient descent:', theta)
-
-# Estimate the price of a 1650 sq-ft, 3 br house
-# ====================== YOUR CODE HERE ======================
-# Recall that the first column of X is all-ones. Thus, it does
-# not need to be normalized.
-
-
-r = np.array([1650, 3])
-r = (r - mu)/sigma
-r2 = np.ones(r.shape[0]+1)
-r2[1:] = r
-r = r2
-price = np.matmul(r,theta)
-
-# ============================================================
-print('\nPredicted price of a 1650 sq-ft, 3 br house (using gradient descent): ${0:.2f}'.format(price))
-
-input('\nPart 2 complete. Program paused. Press enter to continue: ')
-
-## ================ Part 3: Normal Equations ================
-
-print('Solving with normal equations...');
-
-# ====================== YOUR CODE HERE ======================
-# Instructions: The following code computes the closed form 
-#               solution for linear regression using the normal
-#               equations. You should complete the code in 
-#               normalEqn.m
-#
-#               After doing so, you should complete this code 
-#               to predict the price of a 1650 sq-ft, 3 br house.
-#
-
-## Load Data
-data = np.genfromtxt('./data/ex1data2.txt', delimiter=',')
-x = np.array(data[:,:2])
-y = np.array(data[:,2])
-m = y.shape[0]
-
-# Add intercept term to X
-r = x
-x = np.ones((x.shape[0], x.shape[1]+1))
-x[:,1:] = r
-
-# Calculate the parameters from the normal equation
-theta = helper.normalEqn(x, y)
-
-# Display normal equation's result
-print('Theta computed from the normal equations: ', theta)
-
-
-# Estimate the price of a 1650 sq-ft, 3 br house
-# ====================== YOUR CODE HERE ======================
-
-r = np.array([1, 1650, 3])
-price = np.matmul(r,theta)
-
-# ============================================================
-print('Predicted price of a 1650 sq-ft, 3 br house (using normal equations): ${0:.2f}'.format(price))
+def main():
+    # ================ Part 1: Feature Normalization ================
+    #
+    # Clear and Close Figures
+
+    print('Loading data...')
+    data = np.genfromtxt('./data/ex1data2.txt', delimiter=',')
+    x = np.array(data[:, :2])
+    y = np.array(data[:, 2])
+    m = y.shape[0]
+
+    # Print out some data points
+    print('First 10 examples from the dataset: ')
+    for i in range(0, 10):
+        print("x = [%.0f %.0f], y = %.0f" % (x[i, 0], x[i, 1], y[i]))
+
+    input('Program paused. Press enter to continue: ')
+
+    print('\nNormalize Features...')
+
+    x, mu, sigma = helper.featureNormalize(x)
+
+    # add bias unit
+    r = x
+    x = np.ones((x.shape[0], x.shape[1]+1))
+    x[:, 1:] = r
+
+    # ================ Part 2: Gradient Descent ================
+
+    # ====================== YOUR CODE HERE ======================
+    # Instructions: We have provided you with the following starter
+    #               code that runs gradient descent with a particular
+    #               learning rate (alpha).
+    #
+    #               Your task is to first make sure that your functions -
+    #               computeCost and gradientDescent already work with
+    #               this starter code and support multiple variables.
+    #
+    #               After that, try running gradient descent with
+    #               different values of alpha and see which one gives
+    #               you the best result.
+    #
+    #               Finally, you should complete the code at the end
+    #               to predict the price of a 1650 sq-ft, 3 br house.
+    #
+    # Hint: By using the 'hold on' command, you can plot multiple
+    #       graphs on the same figure.
+    #
+    # Hint: At prediction, make sure you do the same feature normalization.
+    #
+    print('\n\nPart 1 complete.')
+    print('\nRunning gradient descent ...')
+
+    # Choose some alpha value
+    alpha = 0.01
+    num_iters = 400
+
+    # Init Theta and Run Gradient Descent
+    theta = np.zeros(3)
+
+    [theta, J_history] = helper.gradientDescentMulti(
+        x,
+        y,
+        theta,
+        alpha,
+        num_iters)
+
+    plt.plot(
+        range(0, num_iters),
+        J_history,
+        color='blue',
+        linestyle='solid')
+
+    plt.xlabel('iterations')
+    plt.ylabel('Cost J')
+    plt.show()
+    print('Theta computed from gradient descent:', theta)
+
+    # Estimate the price of a 1650 sq-ft, 3 br house
+    # ====================== YOUR CODE HERE ======================
+    # Recall that the first column of X is all-ones. Thus, it does
+    # not need to be normalized.
+
+    r = np.array([1650, 3])
+    r = (r - mu)/sigma
+    r2 = np.ones(r.shape[0]+1)
+    r2[1:] = r
+    r = r2
+    price = np.matmul(r, theta)
+
+    # ============================================================
+    print('\nPredicted price of a 1650 sq-ft, 3 br house (using gradient descent): ${0:.2f}'.format(price))
+
+    input('\nPart 2 complete. Program paused. Press enter to continue: ')
+
+    # ================ Part 3: Normal Equations ================
+
+    print('Solving with normal equations...')
+
+    # ====================== YOUR CODE HERE ======================
+    # Instructions: The following code computes the closed form
+    #               solution for linear regression using the normal
+    #               equations. You should complete the code in
+    #               normalEqn.m
+    #
+    #               After doing so, you should complete this code
+    #               to predict the price of a 1650 sq-ft, 3 br house.
+    #
+
+    # Load Data
+    data = np.genfromtxt('./data/ex1data2.txt', delimiter=',')
+    x = np.array(data[:, :2])
+    y = np.array(data[:, 2])
+    m = y.shape[0]
+
+    # Add intercept term to X
+    r = x
+    x = np.ones((x.shape[0], x.shape[1]+1))
+    x[:, 1:] = r
+
+    # Calculate the parameters from the normal equation
+    theta = helper.normalEqn(x, y)
+
+    # Display normal equation's result
+    print('Theta computed from the normal equations: ', theta)
+
+    # Estimate the price of a 1650 sq-ft, 3 br house
+    # ====================== YOUR CODE HERE ======================
+
+    r = np.array([1, 1650, 3])
+    price = np.matmul(r, theta)
+
+    # ============================================================
+    print('Predicted price of a 1650 sq-ft, 3 br house (using normal equations): ${0:.2f}'.format(price))
+
+if __name__ == '__main__':
+    main()
@@ -1,55 +1,59 @@
 import numpy as np
 from numpy.linalg import inv
 
-def computeCost(x,y,theta):
-	
-	j = 1/2*np.mean(np.power(np.matmul(x.transpose(),theta)-y,2))
 
-	return j
+def computeCost(x, y, theta):
+
+    j = 1/2*np.mean(np.power(np.matmul(x.transpose(), theta)-y, 2))
+
+    return j
 
 
 def gradientDescent(x, y, theta, alpha, iterations):
-	m = x.shape[1]
-	costHistory = np.zeros(iterations)
-	for i in range(iterations):
+    m = x.shape[1]
+    costHistory = np.zeros(iterations)
+    for i in range(iterations):
+
+        error = np.matmul(x.transpose(), theta)-y
+        gradient = np.dot(x, error)
 
-		error = np.matmul(x.transpose(),theta)-y
-		gradient = np.dot(x,error)
-		
-		theta = theta - alpha*gradient/m
-		costHistory[i]= computeCost(x,y,theta)
+        theta = theta - alpha*gradient/m
+        costHistory[i] = computeCost(x, y, theta)
 
-	return [theta, costHistory]
+    return [theta, costHistory]
 
-def normalize(x,mu,std):
-	return (x - mu)/std
+
+def normalize(x, mu, std):
+    return (x - mu)/std
 
 
 def featureNormalize(x):
-	vfunc = np.vectorize(normalize)
-	mu = np.mean(x,axis=0)
-	sigma = np.std(x,axis=0)
-	x_norm = vfunc(x,mu,sigma)
+    vfunc = np.vectorize(normalize)
+    mu = np.mean(x, axis=0)
+    sigma = np.std(x, axis=0)
+    x_norm = vfunc(x, mu, sigma)
+
+    return[x_norm, mu, sigma]
 
-	return[x_norm, mu, sigma]
 
 def computeCostMulti(x, y, theta):
-	m=y.shape[0]
-	j = np.sum(np.power(np.matmul(x,theta)-y,2))/(2*m)
-	return j
+    m = y.shape[0]
+    j = np.sum(np.power(np.matmul(x, theta)-y, 2))/(2*m)
+    return j
+
 
 def gradientDescentMulti(x, y, theta, alpha, num_iters):
-	m = y.shape[0]
-	j_history = np.zeros(num_iters)
-	for i in range(0,num_iters):
-		error = np.matmul(x,theta)-y
-		theta = theta - alpha*np.dot(error,x)/m
-		j_history[i] = computeCostMulti(x,y,theta)
-		
-	return [theta, j_history]
-
-
-def normalEqn(X,Y):
-	X= np.matmul(inv(np.matmul(X.transpose(),X)),X.transpose())
-	theta = np.matmul(X,Y)
-	return theta
+    m = y.shape[0]
+    j_history = np.zeros(num_iters)
+    for i in range(0, num_iters):
+        error = np.matmul(x, theta)-y
+        theta = theta - alpha*np.dot(error, x)/m
+        j_history[i] = computeCostMulti(x, y, theta)
+
+    return [theta, j_history]
+
+
+def normalEqn(X, Y):
+    X = np.matmul(inv(np.matmul(X.transpose(), X)), X.transpose())
+    theta = np.matmul(X, Y)
+    return theta
@@ -1,132 +1,127 @@
-## Machine Learning Online Class - Exercise 2: Logistic Regression
-#
-#  Instructions
-#  ------------
-# 
-#  This file contains code that helps you get started on the logistic
-#  regression exercise. You will need to complete the following functions 
-#  in this exericse:
-#
-#     sigmoid - complete
-#     costFunction - complete
-#     predict - complete
-#     costFunctionReg - complete
-#
-#  For this exercise, you will not need to change any code in this file,
-#  or any other files other than those mentioned above.
-#
-
-## Initialization
-
-## Load Data
-#  The first two columns contains the exam scores and the third column
-#  contains the label.
+""" Machine Learning Online Class - Exercise 2: Logistic Regression
+Instructions
+-----------
+This file contains code that helps you get started on the logistic
+regression exercise. You will need to complete the following functions
+in this exericse:
+   sigmoid - complete
+   costFunction - complete
+   predict - complete
+   costFunctionReg - complete
+For this exercise, you will not need to change any code in this file,
+or any other files other than those mentioned above.
+"""
+
+# Imports
 import numpy as np
 import ex2helper as helper
 import matplotlib.pyplot as plt
-data = np.genfromtxt('./data/ex2data1.txt', delimiter=',')
-y = np.array(data[:,2])
-x = np.array(data[:,0:2])
 
 
-## ==================== Part 1: Plotting ====================
-#  We start the exercise by first plotting the data to understand the 
-#  the problem we are working with.
-[m,n] = x.shape
-
-r = x
-x = np.ones((m, n+1))
-x[:,1:] = r
-
-print('\nPlotting data with \'o\' indicating (y = 1) examples and \'x\' indicating (y = 0) examples.')
-
-helper.plotData(x,y)
-plt.xlabel('Exam Score 1')
-plt.ylabel('Exam Score 2')
-plt.show()
-
-input('\nPart 1 completed. Program paused. Press enter to continue: ')
-## ============ Part 2: Compute Cost and Gradient ============
-#  In this part of the exercise, you will implement the cost and gradient
-#  for logistic regression. You neeed to complete the code in 
-#  costFunction.m
-#
-#  Setup the data matrix appropriately, and add ones for the intercept term
-
-
-theta = np.zeros(n+1)
-
-cost = helper.costFunction(theta,x,y)
-grad = helper.gradient(theta, x, y)
-
-
-print('Cost at initial theta (zeros): {0:.3f}'.format(cost))
-print('Expected cost (approx): 0.693')
-print('Gradient at initial theta (zeros): ')
-print(grad)
-print('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628')
-
-
-# Compute and display cost and gradient with non-zero theta
-test_theta = np.array([-24, 0.2, 0.2])
-cost = helper.costFunction(test_theta, x, y)
-grad = helper.gradient(test_theta, x, y)
-
-print('Cost at initial theta (zeros): {0:.3f}'.format(cost))
-print('Expected cost (approx): 0.218')
-print('Gradient at initial theta (zeros): ')
-print(grad)
-print('Expected gradients (approx):\n 0.043\n 2.566\n 2.647')
-
-input('\nPart 2 completed. Program paused. Press enter to continue: ')
-
-## ============= Part 3: Optimizing using fminunc  =============
-#  In this exercise, you will use a built-in function (fminunc) to find the
-#  optimal parameters theta.
-
-#  Set options for fminunc
-
-results = helper.optimize(theta,x,y)
-theta = results.x
-cost = results.fun
-
-# Print theta to screen
-print('Cost at theta found by scipy.optimize.minimize with TNC: {0:.3f}'.format(cost))
-print('Expected cost (approx): 0.203')
-print('theta:')
-print(theta)
-print('Expected theta (approx):')
-print('[ -25.161  0.206  0.201]')
-helper.plotDecisionBoundary(theta,x,y)
-plt.xlabel('Exam Score 1')
-plt.ylabel('Exam Score 2')
-plt.show()
-
-input('\nPart 3 completed. Program paused. Press enter to continue: ')
-
-
-## ============== Part 4: Predict and Accuracies ==============
-#  After learning the parameters, you'll like to use it to predict the outcomes
-#  on unseen data. In this part, you will use the logistic regression model
-#  to predict the probability that a student with score 45 on exam 1 and 
-#  score 85 on exam 2 will be admitted.
-#
-#  Furthermore, you will compute the training and test set accuracies of 
-#  our model.
-#
-#  Your task is to complete the code in predict.m
-#  Predict probability for a student with score 45 on exam 1 
-#  and score 85 on exam 2 
-
-prob = helper.sigmoid(np.matmul(np.array([1, 45, 85]), theta))
-print('For a student with scores 45 and 85, we predict an admission probability of ', prob)
-print('Expected value: 0.775 +/- 0.002');
-
-# Compute accuracy on our training set
-p = helper.predict(theta, x)
-predictions = np.zeros(p.shape)
-predictions[np.where(p==y)] = 1
-
-
-print('Train Accuracy: ', np.mean(predictions) * 100)
-print('Expected accuracy (approx): 89.0\n')
+def main():
+    # Load Data
+    # The first two columns contains the exam scores and the third column
+    # contains the label.
+    data = np.genfromtxt('./data/ex2data1.txt', delimiter=',')
+    y = np.array(data[:, 2])
+    x = np.array(data[:, 0:2])
+
+    #  ==================== Part 1: Plotting ====================
+    #  We start the exercise by first plotting the data to understand the
+    #  the problem we are working with.
+    [m, n] = x.shape
+
+    r = x
+    x = np.ones((m, n+1))
+    x[:, 1:] = r
+
+    print('\nPlotting data with \'o\' indicating (y = 1) examples and \'x\' indicating (y = 0) examples.')
+
+    helper.plotData(x, y)
+    plt.xlabel('Exam Score 1')
+    plt.ylabel('Exam Score 2')
+    plt.show()
+
+    input('\nPart 1 completed. Program paused. Press enter to continue: ')
+    #  ============ Part 2: Compute Cost and Gradient ============
+    #  In this part of the exercise, you will implement the cost and gradient
+    #  for logistic regression. You neeed to complete the code in
+    #  costFunction.m
+    #
+    #  Setup the data matrix appropriately, and add ones for the intercept term
+
+    theta = np.zeros(n+1)
+
+    cost = helper.costFunction(theta, x, y)
+    grad = helper.gradient(theta, x, y)
+
+    print('Cost at initial theta (zeros): {0:.3f}'.format(cost))
+    print('Expected cost (approx): 0.693')
+    print('Gradient at initial theta (zeros): ')
+    print(grad)
+    print('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628')
+
+    # Compute and display cost and gradient with non-zero theta
+    test_theta = np.array([-24, 0.2, 0.2])
+    cost = helper.costFunction(test_theta, x, y)
+    grad = helper.gradient(test_theta, x, y)
+
+    print('Cost at initial theta (zeros): {0:.3f}'.format(cost))
+    print('Expected cost (approx): 0.218')
+    print('Gradient at initial theta (zeros): ')
+    print(grad)
+    print('Expected gradients (approx):\n 0.043\n 2.566\n 2.647')
+
+    input('\nPart 2 completed. Program paused. Press enter to continue: ')
+
+    #  ============= Part 3: Optimizing using fminunc  =============
+    #  In this exercise, you will use a built-in function (fminunc) to find the
+    #  optimal parameters theta.
+
+    #  Set options for fminunc
+
+    results = helper.optimize(theta, x, y)
+    theta = results.x
+    cost = results.fun
+
+    # Print theta to screen
+    print('Cost at theta found by scipy.optimize.minimize with TNC: {0:.3f}'.format(cost))
+    print('Expected cost (approx): 0.203')
+    print('theta:')
+    print(theta)
+    print('Expected theta (approx):')
+    print('[ -25.161  0.206  0.201]')
+    helper.plotDecisionBoundary(theta, x, y)
+    plt.xlabel('Exam Score 1')
+    plt.ylabel('Exam Score 2')
+    plt.show()
+
+    input('\nPart 3 completed. Program paused. Press enter to continue: ')
+
+    #  ============== Part 4: Predict and Accuracies ==============
+    #  After learning the parameters, you'll like to use it to
+    #  predict the outcomes on unseen data. In this part, you will
+    #  use the logistic regression model  to predict the probability
+    #  that a student with score 45 on exam 1 and score 85 on exam 2
+    #  will be admitted
+    #  Furthermore, you will compute the training and test set accuracies
+    #  of our model
+    #  Your task is to complete the code in predict.m
+    #  Predict probability for a student with score 45 on exam 1
+    #  and score 85 on exam 2
+
+    prob = helper.sigmoid(np.matmul(np.array([1, 45, 85]), theta))
+    print('For a student with scores 45 and 85,')
+    print('We predict an admission probability of ', prob)
+    print('Expected value: 0.775 +/- 0.002')
+
+    # Compute accuracy on our training set
+    p = helper.predict(theta, x)
+    predictions = np.zeros(p.shape)
+    predictions[np.where(p == y)] = 1
+
+    print('Train Accuracy: ', np.mean(predictions) * 100)
+    print('Expected accuracy (approx): 89.0\n')
+
+if __name__ == '__main__':
+    main()
@@ -1,125 +1,129 @@
-## Machine Learning Online Class - Exercise 2: Logistic Regression
-#
-#  Instructions
-#  ------------
-#
-#  This file contains code that helps you get started on the second part
-#  of the exercise which covers regularization with logistic regression.
-#
-#  You will need to complete the following functions in this exericse:
-#
-#     sigmoid.m - complete
-#     costFunction.m - complete
-#     predict.m - complete
-#     costFunctionReg.m - complete
-#
-#  For this exercise, you will not need to change any code in this file,
-#  or any other files other than those mentioned above.
-#
-
-## Initialization
-
-## Load Data
-#  The first two columns contains the X values and the third column
-#  contains the label (y).
+""" Machine Learning Online Class - Exercise 2: Logistic Regression
+Instructions
+------------
+This file contains code that helps you get started on the second part
+of the exercise which covers regularization with logistic regression.
+You will need to complete the following functions in this exericse:
+   sigmoid - complete
+   costFunction - complete
+   predict - complete
+   costFunctionReg - complete
+For this exercise, you will not need to change any code in this file,
+or any other files other than those mentioned above.
+"""
+
+# Imports
 import numpy as np
 import ex2helper as helper
 import matplotlib.pyplot as plt
 
-data = np.genfromtxt('./data/ex2data2.txt', delimiter=',')
-y = np.array(data[:,2])
-x = np.array(data[:,0:2])
-
-## =========== Part 1: Regularized Logistic Regression ============
-#  In this part, you are given a dataset with data points that are not
-#  linearly separable. However, you would still like to use logistic
-#  regression to classify the data points.
-#
-#  To do so, you introduce more features to use -- in particular, you add
-#  polynomial features to our data matrix (similar to polynomial
-#  regression).
-#
-
-# Add Polynomial Features
-
-x = helper.mapFeatures(x)
-
-# Initialize fitting parameters
-initial_theta = np.zeros(x.shape[1])
-
-# Set regularization parameter lambda to 1
-lambdaVal = 1
-
-# Compute and display initial cost and gradient for regularized logistic
-# regression
-cost = helper.costFunctionReg(initial_theta, x, y, lambdaVal)
-grad = helper.gradientReg(initial_theta, x, y, lambdaVal)
-
-print('Cost at initial theta (zeros): {:.3f}'.format(cost))
-print('Expected cost (approx): 0.693\n')
-print('\nGradient at initial theta (zeros) - first five values only:')
-print(" {:.4f}  {:.4f}  {:.4f}  {:.4f}  {:.4f}".format(grad[0], grad[1],grad[2],grad[3],grad[4]))
-print('Expected gradients (approx) - first five values only:')
-print(' 0.0085 0.0188 0.0001 0.0503 0.0115\n')
-
-
-# Compute and display cost and gradient
-# with all-ones theta and lambda = 10
-test_theta = np.ones(x.shape[1])
-cost = helper.costFunctionReg(test_theta, x, y, 10)
-grad = helper.gradientReg(test_theta, x, y, 10)
-
-
-print('Cost at test theta (with lambda = 10): {:.2f}'.format(cost))
-print('Expected cost (approx): 3.16')
-print('\nGradient at initial theta (zeros) - first five values only:')
-print(" {:.4f}  {:.4f}  {:.4f}  {:.4f}  {:.4f}".format(grad[0], grad[1],grad[2],grad[3],grad[4]))
-print('Expected gradients (approx) - first five values only:')
-print(' 0.3460 0.1614 0.1948 0.2269 0.0922');
-
-input('\nPart 1 completed. Program paused. Press enter to continue: ')
-
-
-## ============= Part 2: Regularization and Accuracies =============
-#  Optional Exercise:
-#  In this part, you will get to try different values of lambda and
-#  see how regularization affects the decision coundart
-#
-#  Try the following values of lambda (0, 1, 10, 100).
-#
-#  How does the decision boundary change when you vary lambda? How does
-#  the training set accuracy vary?
-#
-
-# Initialize fitting parameters
-theta = np.zeros(x.shape[1])
-
-# Set regularization parameter lambda to 1 (you should vary this)
-lambdaVal = 1
-
-results = helper.optimizeReg(theta,x,y,lambdaVal)
-print(x.shape)
-print(theta.shape)
-print(y.shape)
-theta = results.x
-cost = results.fun
-
-helper.plotData(x,y)
-plt.xlabel('Microchip Test 1')
-plt.ylabel('Microchip Test 2')
-plt.title('Raw Data')
-plt.show()
-
-helper.plotDecisionBoundary(theta,x,y)
-plt.xlabel('Microchip Test 1')
-plt.ylabel('Microchip Test 2')
-plt.title('Lambda = {:}'.format(lambdaVal))
-plt.show()
-
-# Compute accuracy on our training set
-p = helper.predict(theta, x)
-predictions = np.zeros(p.shape)
-predictions[np.where(p==y)] = 1
-
-print('Train Accuracy: {:.1f}'.format(np.mean(predictions) * 100))
-print('Expected accuracy (with lambda = 1): 83.1 (approx)')
+
+def main():
+    # Load Data
+    #  The first two columns contains the X values and the third column
+    #  contains the label (y).
+    data = np.genfromtxt('./data/ex2data2.txt', delimiter=',')
+    y = np.array(data[:, 2])
+    x = np.array(data[:, 0:2])
+
+    # =========== Part 1: Regularized Logistic Regression ============
+    #  In this part, you are given a dataset with data points that are not
+    #  linearly separable. However, you would still like to use logistic
+    #  regression to classify the data points.
+    #
+    #  To do so, you introduce more features to use -- in particular, you add
+    #  polynomial features to our data matrix (similar to polynomial
+    #  regression).
+    #
+
+    # Add Polynomial Features
+    x = helper.mapFeatures(x)
+
+    # Initialize fitting parameters
+    initial_theta = np.zeros(x.shape[1])
+
+    # Set regularization parameter lambda to 1
+    lambdaVal = 1
+
+    # Compute and display initial cost and gradient for regularized logistic
+    # regression
+    cost = helper.costFunctionReg(initial_theta, x, y, lambdaVal)
+    grad = helper.gradientReg(initial_theta, x, y, lambdaVal)
+
+    print('Cost at initial theta (zeros): {:.3f}'.format(cost))
+    print('Expected cost (approx): 0.693\n')
+    print('\nGradient at initial theta (zeros) - first five values only:')
+    print(" {:.4f}  {:.4f}  {:.4f}  {:.4f}  {:.4f}".format(
+                    grad[0],
+                    grad[1],
+                    grad[2],
+                    grad[3],
+                    grad[4]))
+    print('Expected gradients (approx) - first five values only:')
+    print(' 0.0085 0.0188 0.0001 0.0503 0.0115\n')
+
+    # Compute and display cost and gradient
+    # with all-ones theta and lambda = 10
+    test_theta = np.ones(x.shape[1])
+    cost = helper.costFunctionReg(test_theta, x, y, 10)
+    grad = helper.gradientReg(test_theta, x, y, 10)
+
+    print('Cost at test theta (with lambda = 10): {:.2f}'.format(cost))
+    print('Expected cost (approx): 3.16')
+    print('\nGradient at initial theta (zeros) - first five values only:')
+    print(" {:.4f}  {:.4f}  {:.4f}  {:.4f}  {:.4f}".format(
+                    grad[0],
+                    grad[1],
+                    grad[2],
+                    grad[3],
+                    grad[4]))
+    print('Expected gradients (approx) - first five values only:')
+    print(' 0.3460 0.1614 0.1948 0.2269 0.0922')
+
+    input('\nPart 1 completed. Program paused. Press enter to continue: ')
+
+    # ============= Part 2: Regularization and Accuracies =============
+    #  Optional Exercise:
+    #  In this part, you will get to try different values of lambda and
+    #  see how regularization affects the decision coundart
+    #
+    #  Try the following values of lambda (0, 1, 10, 100).
+    #
+    #  How does the decision boundary change when you vary lambda? How does
+    #  the training set accuracy vary?
+
+    # Initialize fitting parameters
+    theta = np.zeros(x.shape[1])
+
+    # Set regularization parameter lambda to 1 (you should vary this)
+    lambdaVal = 1
+
+    results = helper.optimizeReg(theta, x, y, lambdaVal)
+    print(x.shape)
+    print(theta.shape)
+    print(y.shape)
+    theta = results.x
+    cost = results.fun
+
+    helper.plotData(x, y)
+    plt.xlabel('Microchip Test 1')
+    plt.ylabel('Microchip Test 2')
+    plt.title('Raw Data')
+    plt.show()
+
+    helper.plotDecisionBoundary(theta, x, y)
+    plt.xlabel('Microchip Test 1')
+    plt.ylabel('Microchip Test 2')
+    plt.title('Lambda = {:}'.format(lambdaVal))
+    plt.show()
+
+    # Compute accuracy on our training set
+    p = helper.predict(theta, x)
+    predictions = np.zeros(p.shape)
+    predictions[np.where(p == y)] = 1
+
+    print('Train Accuracy: {:.1f}'.format(np.mean(predictions) * 100))
+    print('Expected accuracy (with lambda = 1): 83.1 (approx)')
+
+if __name__ == '__main__':
+    main()
@@ -4,11 +4,22 @@
 
 
 def plotData(X, Y):
-    # np.extract(Y==1,X[0]) returns an array of values in X where the value is 1 in the same location in Y
-    positiveExamples = plt.scatter(np.extract(Y == 1, X[:, 1]), np.extract(Y == 1, X[:, 2]), label="y=1", marker='o',
-                                   color='b', s=10)
-    negativeExamples = plt.scatter(np.extract(Y == 0, X[:, 1]), np.extract(Y == 0, X[:, 2]), label="y=0", marker='x',
-                                   color='r', s=10)
+    positiveExamples = plt.scatter(
+                    np.extract(Y == 1, X[:, 1]),
+                    np.extract(Y == 1, X[:, 2]),
+                    label="y=1",
+                    marker='o',
+                    color='b',
+                    s=10)
+
+    negativeExamples = plt.scatter(
+                    np.extract(Y == 0, X[:, 1]),
+                    np.extract(Y == 0, X[:, 2]),
+                    label="y=0",
+                    marker='x',
+                    color='r',
+                    s=10)
+
     plt.legend(handles=[positiveExamples, negativeExamples], loc='lower left')
 
 
@@ -31,7 +42,12 @@ def gradient(theta, x, y):
 
 
 def optimize(theta, x, y):
-    return op.minimize(fun=costFunction, x0=theta, args=(x, y), method='TNC', jac=gradient)
+    return op.minimize(
+                    fun=costFunction,
+                    x0=theta,
+                    args=(x, y),
+                    method='TNC',
+                    jac=gradient)
 
 
 def predict(theta, x):
@@ -83,7 +99,9 @@ def costFunctionReg(theta, x, y, lambdaVal):
         y = np.squeeze(y)
     m = x.shape[0]
     if (y.shape[0] != m):
-        raise ValueError('Y & X are not compatible: X.shape = {} &  y.shape = {}'.format(X.shape, y.shape))
+        raise ValueError('Y & X are not compatible: X.shape = {} &  y.shape = {}'.format(
+            X.shape,
+            y.shape))
 
     z = sigmoid(np.matmul(x, theta))
 
@@ -104,7 +122,9 @@ def gradientReg(theta, x, y, lambdaVal):
         y = np.squeeze(y)
     m = x.shape[0]
     if (y.shape[0] != m):
-        raise ValueError('Y & X are not compatible: X.shape = {} &  y.shape = {}'.format(X.shape, y.shape))
+        raise ValueError('Y & X are not compatible: X.shape = {} &  y.shape = {}'.format(
+            X.shape,
+            y.shape))
 
     z = sigmoid(np.matmul(x, theta))
 
@@ -118,4 +138,9 @@ def gradientReg(theta, x, y, lambdaVal):
 
 
 def optimizeReg(theta, x, y, lambdaVal):
-    return op.minimize(fun=costFunctionReg, x0=theta, args=(x, y, lambdaVal), method='TNC', jac=gradientReg)
+    return op.minimize(
+                    fun=costFunctionReg,
+                    x0=theta,
+                    args=(x, y, lambdaVal),
+                    method='TNC',
+                    jac=gradientReg)
@@ -1,99 +1,100 @@
-## Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all
-
-#  Instructions
-#  ------------
-#
-#  This file contains code that helps you get started on the
-#  linear exercise. You will need to complete the following functions
-#  in this exericse:
-#
-#     lrCostFunction (logistic regression cost function) - complete
-#     oneVsAll - complete
-#     predictOneVsAll - complete
-#     predict - complated
-#
-#  For this exercise, you will not need to change any code in this file,
-#  or any other files other than those mentioned above.
-#
-
-## Initialization
+""" Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all
+Instructions
+------------
+This file contains code that helps you get started on the
+linear exercise. You will need to complete the following functions
+in this exericse:
+   lrCostFunction (logistic regression cost function) - complete
+   oneVsAll - complete
+   predictOneVsAll - complete
+   predict - complated
+For this exercise, you will not need to change any code in this file,
+or any other files other than those mentioned above.
+"""
+
+# Imports
 import numpy as np
 import matplotlib.pyplot as plt
 import scipy.io as io
 import ex2helper as helper2
 import ex3helper as helper
 
-## Setup the parameters you will use for this part of the exercise
-input_layer_size  = 400;  # 20x20 Input Images of Digits
-num_labels = 10;          # 10 labels, from 1 to 10
-                          # (note that we have mapped "0" to label 10)
 
-## =========== Part 1: Loading and Visualizing Data =============
-#  We start the exercise by first loading and visualizing the dataset.
-#  You will be working with a dataset that contains handwritten digits.
-#
+def main():
+    #  Setup the parameters you will use for this part of the exercise
+    input_layer_size = 400  # 20x20 Input Images of Digits
+    num_labels = 10          # 10 labels, from 1 to 10
+    # (note that we have mapped "0" to label 10)
 
-# Load Training Data
-print('Loading and Visualizing Data ...')
-mat = io.loadmat('./data/ex3data1.mat')
-X = mat['X']
-y = np.squeeze(mat['y'])
+    #  =========== Part 1: Loading and Visualizing Data =============
+    #  We start the exercise by first loading and visualizing the dataset.
+    #  You will be working with a dataset that contains handwritten digits.
 
+    # Load Training Data
+    print('Loading and Visualizing Data ...')
+    mat = io.loadmat('./data/ex3data1.mat')
+    X = mat['X']
+    y = np.squeeze(mat['y'])
 
-m = X.shape[0]
+    m = X.shape[0]
 
-# Randomly select 100 data points to display
-perm = np.random.permutation(m)
-sel = X[perm[0:100],:]
+    # Randomly select 100 data points to display
+    perm = np.random.permutation(m)
+    sel = X[perm[0:100], :]
 
-#display data as image
-helper.displayData(sel)
-plt.show()
- 
-input('\nPart 1 completed. Program paused. Press enter to continue: ')
+    # display data as image
+    helper.displayData(sel)
+    plt.show()
 
-## ============ Part 2a: Vectorize Logistic Regression ============
-#  In this part of the exercise, you will reuse your logistic regression
-#  code from the last exercise. You task here is to make sure that your
-#  regularized logistic regression implementation is vectorized. After
-#  that, you will implement one-vs-all classification for the handwritten
-#  digit dataset.
+    input('\nPart 1 completed. Program paused. Press enter to continue: ')
 
+    #  ============ Part 2a: Vectorize Logistic Regression ============
+    #  In this part of the exercise, you will reuse your logistic regression
+    #  code from the last exercise. You task here is to make sure that your
+    #  regularized logistic regression implementation is vectorized. After
+    #  that, you will implement one-vs-all classification for the handwritten
+    #  digit dataset.
 
-# Test case for lrCostFunction
-print('\nTesting lrCostFunction() with regularization')
+    # Test case for lrCostFunction
+    print('\nTesting lrCostFunction() with regularization')
 
-theta_t = np.array([-2,-1,1,2])
-X_t = np.concatenate((np.array([np.ones(5)]),np.divide(np.arange(1,16,1),10).reshape(3,5)),axis=0).transpose()
-Y_t = np.array([1,0,1,0,1])
-lambda_t = 3
+    theta_t = np.array([-2, -1, 1, 2])
+    X_t = np.concatenate(
+        (np.array([np.ones(5)]),
+            np.divide(np.arange(15) + 1, 10)
+            .reshape(3, 5)),
+        axis=0).transpose()
 
-J = helper2.costFunctionReg(theta_t,X_t,Y_t,lambda_t)
-grad = helper2.gradientReg(theta_t,X_t,Y_t,lambda_t)
+    Y_t = np.array([1, 0, 1, 0, 1])
+    lambda_t = 3
 
-print('Cost: {:.6f}'.format(J))
-print('Expected cost: 2.534819')
-print('Gradients:')
-print(grad)
-print('Expected gradients:')
-print('[0.146561 -0.548558 0.724722 1.398003]')
+    J = helper2.costFunctionReg(theta_t, X_t, Y_t, lambda_t)
+    grad = helper2.gradientReg(theta_t, X_t, Y_t, lambda_t)
 
+    print('Cost: {:.6f}'.format(J))
+    print('Expected cost: 2.534819')
+    print('Gradients:')
+    print(grad)
+    print('Expected gradients:')
+    print('[0.146561 -0.548558 0.724722 1.398003]')
 
-input('\nPart 2a completed. Program paused. Press enter to continue: ')
+    input('\nPart 2a completed. Program paused. Press enter to continue: ')
 
-## ============ Part 2b: One-vs-All Training ============
-print('\nTraining One-vs-All Logistic Regression...')
+    #  ============ Part 2b: One-vs-All Training ============
+    print('\nTraining One-vs-All Logistic Regression...')
 
-lambdaVal = .1
-allTheta = helper.OneVsAll(X, y, np.unique(y), lambdaVal)
+    lambdaVal = .1
+    allTheta = helper.OneVsAll(X, y, np.unique(y), lambdaVal)
 
+    input('\nPart 2b completed. Program paused. Press enter to continue: ')
+    #  ================ Part 3: Predict for One-Vs-All ================
 
-input('\nPart 2b completed. Program paused. Press enter to continue: ')
-## ================ Part 3: Predict for One-Vs-All ================
+    p = helper.predictOneVsAll(allTheta, X)
+    predictions = np.zeros(p.shape)
+    predictions[np.where(p == y)] = 1
 
-p = helper.predictOneVsAll(allTheta,X)
-predictions = np.zeros(p.shape)
-predictions[np.where(p==y)] = 1
+    print('Train Accuracy: {:.1f}%'.format(np.mean(predictions) * 100))
+    print('Expected Accuracy: 96.5%')
 
-print('Train Accuracy: {:.1f}%'.format(np.mean(predictions) * 100))
-print('Expected Accuracy: 96.5%')
+if __name__ == '__main__':
+    main()
@@ -1,101 +1,96 @@
-## Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks
-
-#  Instructions
-#  ------------
-# 
-#  This file contains code that helps you get started on the
-#  linear exercise. You will need to complete the following functions 
-#  in this exericse:
-#
-#     lrCostFunction (logistic regression cost function) - completed
-#     oneVsAll - completed
-#     predictOneVsAll - completed
-#     predict - completed
-#
-#  For this exercise, you will not need to change any code in this file,
-#  or any other files other than those mention  d above.
-#
-
-## Initialization
+""" Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks
+Instructions
+------------
+This file contains code that helps you get started on the
+linear exercise. You will need to complete the following functions
+in this exericse:
+   lrCostFunction (logistic regression cost function) - completed
+   oneVsAll - completed
+   predictOneVsAll - completed
+   predict - completed
+For this exercise, you will not need to change any code in this file,
+or any other files other than those mentioned above.
+"""
+
+# Imports
 import numpy as np
 import matplotlib.pyplot as plt
 import scipy.io as io
 import ex2helper as helper2
 import ex3helper as helper
 
-## Setup the parameters you will use for this exercise
-input_layer_size  = 400;  # 20x20 Input Images of Digits
-hidden_layer_size = 25;   # 25 hidden units
-num_labels = 10;          # 10 labels, from 1 to 10   
-                          # (note that we have mapped "0" to label 10)
 
-## =========== Part 1: Loading and Visualizing Data =============
-#  We start the exercise by first loading and visualizing the dataset. 
-#  You will be working with a dataset that contains handwritten digits.
-#
+def main():
+    # Setup the parameters you will use for this exercise
+    input_layer_size = 400  # 20x20 Input Images of Digits
+    hidden_layer_size = 25  # 25 hidden units
+    num_labels = 10         # 10 labels, from 1 to 10
+    # (note that we have mapped "0" to label 10)
 
-# Load Training Data
-print('Loading and Visualizing Data ...')
+    # =========== Part 1: Loading and Visualizing Data =============
+    #  We start the exercise by first loading and visualizing the dataset.
+    #  You will be working with a dataset that contains handwritten digits.
 
-mat = io.loadmat('./data/ex3data1.mat')
-X = mat['X']
-y = np.squeeze(mat['y'])
+    # Load Training Data
+    print('Loading and Visualizing Data ...')
 
+    mat = io.loadmat('./data/ex3data1.mat')
+    X = mat['X']
+    y = np.squeeze(mat['y'])
 
-m = y.shape[0]
+    m = y.shape[0]
 
-# Randomly select 100 data points to display
-perm = np.random.permutation(m)
-sel = X[perm[0:100],:]
+    # Randomly select 100 data points to display
+    perm = np.random.permutation(m)
+    sel = X[perm[0:100], :]
 
-#display data as image
-helper.displayData(sel)
-plt.show()
+    # display data as image
+    helper.displayData(sel)
+    plt.show()
 
+    input('\nPart 1 completed. Program paused. Press enter to continue: ')
 
-input('\nPart 1 completed. Program paused. Press enter to continue: ')
+    #  ================ Part 2: Loading Pameters ================
+    # In this part of the exercise, we load some pre-initialized
+    # neural network parameters.
 
+    print('\nLoading Saved Neural Network Parameters ...')
 
-## ================ Part 2: Loading Pameters ================
-# In this part of the exercise, we load some pre-initialized 
-# neural network parameters.
+    # Load the weights into variables Theta1 and Theta2
+    mat = io.loadmat('./data/ex3weights.mat')
+    theta1 = mat['Theta1']
+    theta2 = mat['Theta2']
 
-print('\nLoading Saved Neural Network Parameters ...')
+    #  ================= Part 3: Implement Predict =================
+    #  After training the neural network, we would like to use it to predict
+    #  the labels. You will now implement the "predict" function to use the
+    #  neural network to predict the labels of the training set. This lets
+    #  you compute the training set accuracy.
 
-# Load the weights into variables Theta1 and Theta2
-mat = io.loadmat('./data/ex3weights.mat')
-theta1 = mat['Theta1']
-theta2 = mat['Theta2']
+    p = helper.predict(theta1, theta2, X)
+    predictions = np.zeros(p.shape)
+    predictions[np.where(p == y)] = 1
 
-## ================= Part 3: Implement Predict =================
-#  After training the neural network, we would like to use it to predict
-#  the labels. You will now implement the "predict" function to use the
-#  neural network to predict the labels of the training set. This lets
-#  you compute the training set accuracy.
+    print('Train Set Accuracy: {:.1f}%'.format(np.mean(predictions) * 100))
 
-p = helper.predict(theta1, theta2, X)
-predictions = np.zeros(p.shape)
-predictions[np.where(p==y)] = 1
+    input('\nPart 3 completed. Program paused. Press enter to continue: ')
 
-print('Train Set Accuracy: {:.1f}%'.format(np.mean(predictions) * 100))
+    # Randomly select 100 data points to display
+    perm = np.random.permutation(m)
+    for i in range(0, m):
+        print('\n    Displaying Example Image...\n')
+        example = X[perm[i], :]
+        example = example[np.newaxis, :]
 
-input('\nPart 3 completed. Program paused. Press enter to continue: ')
+        helper.displayData(example)
+        plt.show()
+        p = helper.predict(theta1, theta2, example)
+        print('    Neural Network Prediction: {}'.format(p[0] % 10))
+        print('    Correct Answer: {}\n'.format(y[perm[i]] % 10))
 
-# Randomly select 100 data points to display
-perm = np.random.permutation(m)
-for i in range(0,m):
-	print('\n    Displaying Example Image...\n')
-	example = X[perm[i],:]
-	example = example[np.newaxis,:]
+        answer = input('Paused - press enter to continue, q to exit:')
+        if(answer == 'q'):
+            break
 
-	helper.displayData(example)
-	plt.show()
-	p = helper.predict(theta1, theta2, example)
-	print('    Neural Network Prediction: {}'.format(p[0]%10))
-	print('    Correct Answer: {}\n'.format(y[perm[i]]%10))
-
-
-
-	answer = input('Paused - press enter to continue, q to exit:')
-	if(answer=='q'):
-		break
+if __name__ == '__main__':
+    main()
@@ -3,115 +3,130 @@
 import ex2helper as helper
 import math
 
+
 def OneVsAll(X, y, numlabels, lambdaVal):
-	m = X.shape[0] #number of examples
-	n = X.shape[1] #number of data points
- 
-	X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
-	theta = np.array([])#initialize theta
+    m = X.shape[0]  # number of examples
+    n = X.shape[1]  # number of data points
+
+    X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)  # adding bias unit
+    theta = np.array([])  # initialize theta
 
+    for i in numlabels:
+        yTemp = np.zeros(y.shape[0])
+        yTemp[np.where(y == i)] = 1
+        thetaTemp = np.zeros(n + 1)
 
-	for i in numlabels:
-		yTemp = np.zeros(y.shape[0])
-		yTemp[np.where(y==i)] = 1
-		thetaTemp = np.zeros(n + 1)
+        # run regularized optimization
+        results = helper.optimizeReg(thetaTemp, X, yTemp, lambdaVal)
+        thetaTemp = results.x
 
-		#run regularized optimization
-		results = helper.optimizeReg(thetaTemp, X, yTemp, lambdaVal)
-		thetaTemp = results.x
+        # get prediction accuracy
+        p = helper.predict(thetaTemp, X)
+        predictions = np.zeros(p.shape)
+        predictions[np.where(p == yTemp)] = 1
+        p = helper.sigmoid(np.matmul(X, thetaTemp))
 
-		#get prediction accuracy
-		p = helper.predict(thetaTemp, X)
-		predictions = np.zeros(p.shape)
-		predictions[np.where(p==yTemp)] = 1
-		p = helper.sigmoid(np.matmul(X,thetaTemp))
+        # Validating that the function is working
+        print('Train Accuracy: {:.1f}%'.format(np.mean(predictions) * 100))
+        print('cost for {} = {:.3f}, max = {:.3f}'.format(
+            i % 10,
+            results.fun,
+            np.max(p)))
 
-		#calculating cost and accuracy to validate that the function is working correctly
-		print('Train Accuracy: {:.1f}%'.format(np.mean(predictions) * 100))
-		print('cost for {} = {:.3f}, max = {:.3f}'.format(i%10,results.fun,np.max(p)))
+        # appending discovered theta to theta
+        theta = np.append(theta, thetaTemp)
 
-		theta = np.append(theta, thetaTemp)#appending discovered theta to theta
+    # Struggled on this for awhile.
+    # Reshape works from left to right, top to bottom.
+    # So if your data needs to be in columns instead of rows,
+    # It messes it all up, but it still "works"
+    theta = np.reshape(theta, (numlabels.shape[0], n + 1))
+    return theta.transpose()
 
-	#struggled on this for awhile. Reshape works from left to right, top to bottom. 
-	#so if your data needs to be in columns instead of rows. It messes it all up, but it still works 
-	theta = np.reshape(theta, (numlabels.shape[0],n + 1))
-	return theta.transpose()
 
 def predictOneVsAll(allTheta, X):
-	X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
+    X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)
+    # adding bias unit
+
+    pred = helper.sigmoid(np.matmul(X, allTheta))
+    # calculate predictions for all thetas
+
+    # return vector of position of maximum for each
+    # row +1 to adjust for arrays initializing at 0
+    return(np.argmax(pred, axis=1)+1)
 
-	pred = helper.sigmoid(np.matmul(X,allTheta))#calculate predictions for all thetas
-	
-	#return vector of position of maximum for each row +1 to adjust for arrays initializing at 0
-	return(np.argmax(pred,axis=1)+1)
 
 def displayData(X, **keywordParameters):
-	#set example width automatically if not given
-	if('exampleWidth' in keywordParameters):
-		exampleWidth = keywordParameters['exampleWidth']
-	else:
-		exampleWidth = round(math.sqrt(X.shape[1]))
-
-	#calculate size of rows and columns
-	[m, n] = X.shape
-	exampleHeight = n//exampleWidth #eliminating float with // divide
-
-	#calculate number of items to display
-	displayRows = math.floor(math.sqrt(m))
-	displayColumns = math.ceil(m/displayRows)
-
-	#set padding between images
-	padding = 1
-
-	#set up blank display
-	displayHeight = padding + displayRows * (exampleHeight + padding)
-	displayWidth = padding + displayColumns * (exampleWidth + padding)
-
-	displayArray = - np.ones([displayHeight, displayWidth])
-
-	#Copy each example into a path on the display array
-	currentExample = 0
-	for j in range(0,displayRows):
-		for i in range(0, displayColumns):
-			if(currentExample > m):
-				break
-
-			#Copy the Patch
-
-			#1. get the max value of the patch
-			maxValue = np.amax(np.absolute(X[currentExample,:]))
-			
-			#2. get current example in the correct shape
-			example = np.reshape(X[currentExample,:], [exampleHeight, exampleWidth])/maxValue
-			example = example.transpose()
-
-			#3. calculate current position height and width
-			currentPositionHeight = padding + j * (exampleHeight + padding)
-			currentPositionWidth = padding + i * (exampleWidth + padding)
-			
-			#4. assign current example to correct position in the display array
-			displayArray[currentPositionHeight:currentPositionHeight + exampleHeight, currentPositionWidth:currentPositionWidth + exampleWidth] = example
-
-			#5. iterate current example
-			currentExample = currentExample + 1
-
-		if(currentExample>m):
-			break
-
-	#show image
-	imgplot = plt.imshow(displayArray, cmap='gray')
-	plt.axis('off')
+    # set example width automatically if not given
+    if('exampleWidth' in keywordParameters):
+        exampleWidth = keywordParameters['exampleWidth']
+    else:
+        exampleWidth = round(math.sqrt(X.shape[1]))
+
+    # calculate size of rows and columns
+    [m, n] = X.shape
+    exampleHeight = n//exampleWidth  # eliminating float with // divide
+
+    # calculate number of items to display
+    displayRows = math.floor(math.sqrt(m))
+    displayColumns = math.ceil(m/displayRows)
+
+    # set padding between images
+    padding = 1
+
+    # set up blank display
+    displayHeight = padding + displayRows * (exampleHeight + padding)
+    displayWidth = padding + displayColumns * (exampleWidth + padding)
+
+    displayArray = - np.ones([displayHeight, displayWidth])
+
+    # Copy each example into a path on the display array
+    currentExample = 0
+    for j in range(0, displayRows):
+        for i in range(0, displayColumns):
+            if(currentExample > m):
+                break
+
+            # Copy the Patch
+
+            # 1. get the max value of the patch
+            maxValue = np.amax(np.absolute(X[currentExample, :]))
+
+            # 2. get current example in the correct shape
+            example = np.reshape(
+                X[currentExample, :],
+                [exampleHeight, exampleWidth])/maxValue
+            example = example.transpose()
+
+            # 3. calculate current position height and width
+            positionHeight = padding + j * (exampleHeight + padding)
+            positionWidth = padding + i * (exampleWidth + padding)
+
+            # 4. assign current example to correct position in the displayarray
+            displayArray[
+                    positionHeight:positionHeight + exampleHeight,
+                    positionWidth:positionWidth + exampleWidth] = example
+
+            # 5. iterate current example
+            currentExample = currentExample + 1
+
+        if(currentExample > m):
+            break
+
+    # show image
+    imgplot = plt.imshow(displayArray, cmap='gray')
+    plt.axis('off')
+
 
 def predict(theta1, theta2, X):
-	m = X.shape[0]
-	num_labels = theta2.shape[0]
-
-	X = np.insert(X,0,np.ones(X.shape[0]),axis=1) # adding bias unit
-	a1 = np.matmul(X,theta1.transpose())
-	a1 = helper.sigmoid(a1)
-	a1 = np.insert(a1,0,np.ones(a1.shape[0]),axis=1) # adding bias unit
-	a2 = np.matmul(a1,theta2.transpose())
-	a2 = helper.sigmoid(a2)
-	
-	return(np.argmax(a2,axis=1)+1)
+    m = X.shape[0]
+    num_labels = theta2.shape[0]
+
+    X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)  # adding bias unit
+    a1 = np.matmul(X, theta1.transpose())
+    a1 = helper.sigmoid(a1)
+    a1 = np.insert(a1, 0, np.ones(a1.shape[0]), axis=1)  # adding bias unit
+    a2 = np.matmul(a1, theta2.transpose())
+    a2 = helper.sigmoid(a2)
 
+    return(np.argmax(a2, axis=1)+1)
@@ -8,79 +8,110 @@
 
 
 def main():
-	checkNNGradients(0)
-	#checkNNGradients(1)
+    checkNNGradients(0)
 
-def checkNNGradients(lambdaVal):
-	#   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
-	#   backpropagation gradients, it will output the analytical gradients
-	#   produced by your backprop code and the numerical gradients (computed
-	#   using computeNumericalGradient). These two gradient computations should
-	#   result in very similar values.
-	#
-
-	inputLayerSize = 3
-	hiddenLayerSize = 5
-	numLabels = 3
-	m = 5
-
-	#We generate some 'random' test data
-	theta1 = debugInitializeWeights(hiddenLayerSize, inputLayerSize)
-	theta2 = debugInitializeWeights(numLabels, hiddenLayerSize)
-	
-	# Reusing debugInitializeWeights to generate X
-	X = debugInitializeWeights(m, inputLayerSize - 1); 
-	y = np.remainder(np.arange(m),numLabels) + 1
-
-	#unroll parameters
-	nnParams = np.append(theta1.flatten(), theta2.flatten())
-
-	#calculate gradient with backprop
-	grad = helper.BackPropagation(nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
 
-	#calculate difference between backprop and numerical gradient
-	diff = op.check_grad(costMask, backPropMask, nnParams, inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal, epsilon=.0001)
-	
-	numGrad = op.approx_fprime(nnParams, costMask, .001 , inputLayerSize, hiddenLayerSize, numLabels, X, y, lambdaVal)
-	# Visually examine the two gradient computations.  The two columns you get should be very similar.
+def checkNNGradients(lambdaVal):
+    #   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
+    #   backpropagation gradients, it will output the analytical gradients
+    #   produced by your backprop code and the numerical gradients (computed
+    #   using computeNumericalGradient). These two gradient computations should
+    #   result in very similar values.
+    #
+
+    inputLayerSize = 3
+    hiddenLayerSize = 5
+    numLabels = 3
+    m = 5
+
+    # We generate some 'random' test data
+    theta1 = debugInitializeWeights(hiddenLayerSize, inputLayerSize)
+    theta2 = debugInitializeWeights(numLabels, hiddenLayerSize)
+
+    # Reusing debugInitializeWeights to generate X
+    X = debugInitializeWeights(m, inputLayerSize - 1)
+    y = np.remainder(np.arange(m), numLabels) + 1
+
+    # unroll parameters
+    nnParams = np.append(theta1.flatten(), theta2.flatten())
+
+    # calculate gradient with backprop
+    grad = helper.BackPropagation(
+        nnParams,
+        inputLayerSize,
+        hiddenLayerSize,
+        numLabels,
+        X,
+        y,
+        lambdaVal)
+
+    # calculate difference between backprop and numerical gradient
+    diff = op.check_grad(
+        costMask,
+        backPropMask,
+        nnParams,
+        inputLayerSize,
+        hiddenLayerSize,
+        numLabels,
+        X,
+        y,
+        lambdaVal,
+        epsilon=.0001)
+
+    numGrad = op.approx_fprime(
+        nnParams,
+        costMask,
+        .001,
+        inputLayerSize,
+        hiddenLayerSize,
+        numLabels,
+        X,
+        y,
+        lambdaVal)
+
+    # Visually examine the two gradient computations.
+    # The two columns you get should be very similar.
+    print('\nComparing Gradients: (numGrad, grad, absolute difference)')
+
+    for i in range(numGrad.shape[0]):
+        print("{}: {:.9f}, {:.9f} {:.9f}".format(
+            i+1,
+            numGrad[i],
+            grad[i],
+            abs(numGrad[i] - grad[i])))
+
+    print('The above left two columns you get should be very similar.')
+    print('(Left-Your Numerical Gradient, Right-Analytical Gradient)')
+
+    # Evaluate the norm of the difference between two solutions.
+    # If you have a correct implementation
+    # and you used EPSILON = 0.0001
+    # in computeNumericalGradient,
+    # then diff below should be less than 1e-9
+
+    print('If your backpropagation implementation is correct, then ')
+    print('the relative difference will be small (less than 1e-9).')
+    print('Relative Difference: {}'.format(diff))
 
 
-	print('\nComparing Gradients: (numGrad, grad, absolute difference)')
+def debugInitializeWeights(fanOut, fanIn):
+    # Initialize W using "sin", this ensures that  vW is always of the same
+    # values and will be useful for debugging
+    # numel ~ number of elements. equivalent to size, w.size
+    # size, equivalent of shape, w.shape
 
-	for i in range(0,numGrad.shape[0]):
-		print("{}: {:.9f}, {:.9f} {:.9f}".format(i+1, numGrad[i], grad[i], abs(numGrad[i] - grad[i])))
-	
+    W = np.arange(fanOut*(fanIn+1))
+    W = W.reshape(fanOut, fanIn+1)
+    W = np.sin(W)/10
+    return W
 
-	print('The above left two columns you get should be very similar.')
-	print('(Left-Your Numerical Gradient, Right-Analytical Gradient)')
 
-	# Evaluate the norm of the difference between two solutions.  
-	# If you have a correct implementation, and assuming you used EPSILON = 0.0001 
-	# in computeNumericalGradient.m, then diff below should be less than 1e-9
+def backPropMask(nnParams, *args):
+    return helper.BackPropagation(nnParams, *args)
 
-	print('If your backpropagation implementation is correct, then ')
-	print('the relative difference will be small (less than 1e-9).')
-	print('Relative Difference: {}'.format(diff))
 
-
-def debugInitializeWeights(fanOut, fanIn):
-   	# Initialize W using "sin", this ensures that  vW is always of the same
-	# values and will be useful for debugging
-		# W = zeros(fan_out, 1 + fan_in);
-		# W = reshape(sin(1:numel(W)), size(W)) / 10;
-	# numel ~ number of elements. equivalent to size, w.size
-	# size, equivalent of shape, w.shape
-	W = np.arange(fanOut*(fanIn+1))
-	W = W.reshape(fanOut, fanIn+1)
-	W = np.sin(W)/10
-	return W
-
-def backPropMask(nnParams,*args):
-	return helper.BackPropagation(nnParams,*args)
-	
-def costMask(nnParams,*args):
-	return helper.nnCostFunction(nnParams,*args)
+def costMask(nnParams, *args):
+    return helper.nnCostFunction(nnParams, *args)
 
 if __name__ == '__main__':
-	main()
-
+    main()
@@ -1,140 +1,202 @@
-import numpy as np 
+import numpy as np
 import matplotlib.pyplot as plt
 import scipy.optimize as op
 import math
 import matplotlib.image as mpimg
 
-def nnCostFunction(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal):
-	#get num examples
-	m = X.shape[0]
 
-	#get Theta Matrices
-	[theta1, theta2] = getThetas(nnParams,inputSize,hiddenLayerSize,outputSize)
+def nnCostFunction(
+        nnParams,
+        inputSize,
+        hiddenLayerSize,
+        outputSize,
+        X,
+        y,
+        lambdaVal):
+    # get num examples
+    m = X.shape[0]
+
+    # get Theta Matrices
+    [theta1, theta2] = getThetas(
+        nnParams,
+        inputSize,
+        hiddenLayerSize,
+        outputSize)
+
+    # prepare Y matrix for cost function
+    Y = getYMatrix(y)
+
+    # forward Pass
+    [a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
+
+    # getting regulation parameters
+    R1 = theta1[:, 1:]
+    R2 = theta2[:, 1:]
+
+    # calculating the cost of regulation
+    costRegulation = lambdaVal*(np.sum(
+        np.square(R1.flatten())) + np.sum(
+        np.square(R2.flatten())))/(2*m)
+
+    # calculating true cost without regulation
+    cost = np.sum(
+        np.log(np.extract(Y == 1, h2))) + np.sum(
+        np.log(1-np.extract(Y == 0, h2)))
+
+    cost = -cost/m
+
+    # calculate total cost
+    totalCost = cost + costRegulation
+
+    return totalCost
+
+
+def BackPropagation(
+        nnParams,
+        inputSize,
+        hiddenLayerSize,
+        outputSize,
+        X,
+        y,
+        lambdaVal):
+    # get num examples
+    m = X.shape[0]
+    # get Theta Matrices
+    [theta1, theta2] = getThetas(
+        nnParams,
+        inputSize,
+        hiddenLayerSize,
+        outputSize)
+
+    # prepare Y matrix for cost function
+    Y = getYMatrix(y)  # 5x3
+
+    # forward Pass
+    [a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
+    # a1 = 5x4, z1 = 5x5, a2 = 5x5, a2 = 5x6, z2 = 5x3, h2 = 5x3
+
+    # backward
+    theta2Error = h2-Y  # 5x3
+    theta1Error = np.matmul(theta2Error, theta2[:, 1:])*sigmoidGradient(z1)
+
+    D1 = np.matmul(theta1Error.transpose(), a1)
+    D2 = np.matmul(theta2Error.transpose(), a2)
+
+    # average the gradient per example
+    theta1Grad = D1/m
+    theta2Grad = D2/m
+
+    # calculate regulation terms
+    theta1Reg = lambdaVal*theta1/m
+    theta2Reg = lambdaVal*theta2/m
+    theta1Reg[:, 0] = 0
+    theta2Reg[:, 0] = 0
+
+    # combine gradient and regulation terms
+    theta1Grad = theta1Grad + theta1Reg
+    theta2Grad = theta2Grad + theta2Reg
+
+    return np.append(theta1Grad.flatten(), theta2Grad.flatten())
 
-	#prepare Y matrix for cost function
-	Y = getYMatrix(y)
 
-	#forward Pass
-	[a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
-
-
-	#getting regulation parameters
-	R1 = theta1[:,1:]
-	R2 = theta2[:,1:]
-
-	# calculating the cost of regulation
-	costRegulation = lambdaVal*(np.sum(np.square(R1.flatten())) + np.sum(np.square(R2.flatten())))/(2*m)
-	
-	#calculating true cost without regulation
-	cost = np.sum(np.log(np.extract(Y==1,h2))) + np.sum(np.log(1-np.extract(Y==0,h2)))
-
-	cost = -cost/m
-
-	#calculate total cost
-	totalCost = cost + costRegulation
-
-	return totalCost
-
-def BackPropagation(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal):
-	#get num examples
-	m = X.shape[0]
-	#get Theta Matrices
-	[theta1, theta2] = getThetas(nnParams,inputSize,hiddenLayerSize,outputSize)
-
-
-	#prepare Y matrix for cost function
-	Y = getYMatrix(y) #5x3
+def forwardPass(nnParams, X):
+    theta1 = nnParams[0]
+    theta2 = nnParams[1]
 
-	#forward Pass
-	[a1, z1, a2, z2, h2] = forwardPass(np.array([theta1, theta2]), X)
-	#a1 = 5x4, z1 = 5x5, a2 = 5x5, a2 = 5x6, z2 = 5x3, h2 = 5x3
+    # left side is the example count
+    # layer 1
+    a1 = np.insert(X, 0, np.ones(X.shape[0]), axis=1)  # 5x4
+    z1 = np.matmul(a1, theta1.transpose())  # 5x5
+    a2 = sigmoid(z1)  # 5x5
 
-	#backward
-	theta2Error = h2-Y #5x3
-	theta1Error = np.matmul(theta2Error,theta2[:,1:])*sigmoidGradient(z1)
-	
-	D1 = np.matmul(theta1Error.transpose(),a1)
-	D2 = np.matmul(theta2Error.transpose(),a2)
+    # layer 2
+    a2 = np.insert(
+        a2,
+        0,
+        np.ones(a1.shape[0]), axis=1)  # adding bias unit  5x6
 
-	#average the gradient per example	
-	theta1Grad = D1/m
-	theta2Grad = D2/m
+    z2 = np.matmul(a2, theta2.transpose())  # 5x3
+    a3 = sigmoid(z2)  # 5x3
 
-	#calculate regulation terms
-	theta1Reg = lambdaVal*theta1/m
-	theta2Reg = lambdaVal*theta2/m
-	theta1Reg[:,0] = 0
-	theta2Reg[:,0] = 0
+    return [a1, z1, a2, z2, a3]
 
-	#combine gradient and regulation terms	
-	theta1Grad = theta1Grad + theta1Reg
-	theta2Grad = theta2Grad + theta2Reg
 
-	return np.append(theta1Grad.flatten(), theta2Grad.flatten())
+def predictNN(nnParams, X):
+    results = forwardPass(nnParams, X)
+    pred = results[4]
+    return (np.argmax(pred, axis=1)+1)
 
-def forwardPass(nnParams, X):
-	theta1 = nnParams[0]
-	theta2 = nnParams[1]
 
-	#left side is the example count
-	#layer 1
-	a1 = np.insert(X,0,np.ones(X.shape[0]),axis=1)#5x4
-	z1 = np.matmul(a1,theta1.transpose())#5x5
-	a2 = sigmoid(z1)#5x5
+def nnAccuracy(nnParams, X, inputLayerSize, hiddenLayerSize, numLabels, y):
 
+    thetas = getThetas(
+        nnParams,
+        inputLayerSize,
+        hiddenLayerSize,
+        numLabels)
 
-	#layer 2
-	a2 = np.insert(a2,0,np.ones(a1.shape[0]),axis=1) # adding bias unit  5x6
-	z2 = np.matmul(a2,theta2.transpose()) #5x3
-	a3 = sigmoid(z2) #5x3
+    p = predictNN(thetas, X)
 
-	return [a1, z1, a2, z2, a3]
+    predictions = np.zeros(p.shape)
+    predictions[np.where(p == y)] = 1
 
-def predictNN(nnParams, X):
-	results = forwardPass(nnParams, X)
-	pred = results[4]
-	return(np.argmax(pred,axis=1)+1)
+    return np.mean(predictions) * 100
 
-def nnAccuracy(nnParams, X, inputLayerSize, hiddenLayerSize, numLabels, y):
 
-	thetas = getThetas(nnParams, inputLayerSize, hiddenLayerSize, numLabels)
-	
-	p = predictNN(thetas, X)
+def getYMatrix(y):
+    # prepare Y matrix for cost function
+    numLabels = np.unique(y).shape[0]
 
-	predictions = np.zeros(p.shape)
-	predictions[np.where(p==y)] = 1
+    # create boolean array of value or not out of 1s and 0s
+    Y = (y == 1).astype(int)
+    for i in range(2, numLabels + 1):
+        Y = np.append(Y, (y == i).astype(int))
 
-	return np.mean(predictions) * 100
+    # reshape so first dimension corresponds with label
+    Y = Y.reshape(numLabels, y.shape[0])
+    return Y.transpose()
 
-def getYMatrix(y):
-	#prepare Y matrix for cost function
-	numLabels = np.unique(y).shape[0]
 
-	#create boolean array of value or not out of 1s and 0s
-	Y = (y==1).astype(int)
-	for i in range(2, numLabels + 1):
-		Y = np.append(Y,(y==i).astype(int))
-	#reshape so first dimension corresponds with label
-	Y = Y.reshape(numLabels,y.shape[0])
-	return Y.transpose()
+def getThetas(nnParams, inputSize, hiddenLayerSize, outputSize):
+    theta1Length = (inputSize+1)*hiddenLayerSize
 
-def getThetas(nnParams,inputSize,hiddenLayerSize,outputSize):
-	theta1Length = (inputSize+1)*hiddenLayerSize
+    theta1 = nnParams[:theta1Length]
+    theta2 = nnParams[theta1Length:]
 
-	theta1 = nnParams[:theta1Length]
-	theta2 = nnParams[theta1Length:]
+    theta1 = theta1.reshape(hiddenLayerSize, inputSize+1)
+    theta2 = theta2.reshape(outputSize, hiddenLayerSize+1)
 
-	theta1 = theta1.reshape(hiddenLayerSize,inputSize+1)
-	theta2 = theta2.reshape(outputSize,hiddenLayerSize+1)
+    return[theta1, theta2]
 
-	return[theta1, theta2]
 
 def sigmoidGradient(Z):
-	R = sigmoid(Z)
-	return R*(1-R)
+    R = sigmoid(Z)
+    return R*(1-R)
 
-def sigmoid(Z):
-	return 1/(1+np.exp(-Z))
 
-def optimizeNN(nnParams, inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal, maxIter):
-	return op.minimize(fun=nnCostFunction, x0=nnParams, args=(inputSize, hiddenLayerSize, outputSize, X, y, lambdaVal), method='TNC', jac=BackPropagation, options={'maxiter': maxIter, 'disp': True})
+def sigmoid(Z):
+    return 1/(1+np.exp(-Z))
+
+
+def optimizeNN(
+        nnParams,
+        inputSize,
+        hiddenLayerSize,
+        outputSize,
+        X,
+        y,
+        lambdaVal,
+        maxIter):
+    return op.minimize(
+        fun=nnCostFunction,
+        x0=nnParams,
+        args=(
+            inputSize,
+            hiddenLayerSize,
+            outputSize,
+            X,
+            y,
+            lambdaVal),
+        method='TNC',
+        jac=BackPropagation,
+        options={'maxiter': maxIter, 'disp': True})
@@ -2,136 +2,211 @@
 import scipy.optimize as op
 import matplotlib.pyplot as plt
 
+
 def linearRegressionCost(theta, X, y, lambdaVal):
-	'''
-	Calculate the cost for the linear regression model
-	'''
-	m = y.shape[0]
-	X = np.insert(X,0,np.ones(X.shape[1]),axis=0)
+    '''
+    Calculate the cost for the linear regression model
+    '''
+    m = y.shape[0]
+    X = np.insert(
+        X,
+        0,
+        np.ones(X.shape[1]),
+        axis=0)
 
-	pred = linearRegressionPredict(X, theta)
+    pred = linearRegressionPredict(X, theta)
 
-	j = np.sum(np.power(pred-y,2))
-	
-	reg = np.power(theta,2)
-	reg[0] = 0
-	reg = lambdaVal*np.sum(reg)
+    j = np.sum(np.power(pred-y, 2))
 
-	return (j+reg)/(2*m)
+    reg = np.power(theta, 2)
+    reg[0] = 0
+    reg = lambdaVal*np.sum(reg)
 
-def linearRegressionGradient(theta, X, y, lambdaVal):
-	'''
-	Calculate the gradient for the linear regression model
-	'''
-	m = y.shape[0]
-	X = np.insert(X,0,np.ones(X.shape[1]),axis=0)
-	
-	prediction = linearRegressionPredict(X, theta)
-	
-	error = prediction - y
-	
-	grad = np.matmul(X,error)/m
-
-	reg = lambdaVal/m*theta
-	reg[0] = 0
-
-	return (grad + reg)
+    return (j+reg)/(2*m)
 
-def linearRegressionPredict(X, theta, **kwargs):
-	'''
-	predict the value for the provided linear regression model
-	'''
-	addBias = kwargs.pop('addBias', False)
 
-	if(addBias):
-		X = np.insert(X,0,np.ones(X.shape[1]),axis=0)
+def linearRegressionGradient(theta, X, y, lambdaVal):
+    '''
+    Calculate the gradient for the linear regression model
+    '''
+    m = y.shape[0]
+    X = np.insert(
+        X,
+        0,
+        np.ones(X.shape[1]),
+        axis=0)
 
-	return np.matmul(X.transpose(),theta)
+    prediction = linearRegressionPredict(X, theta)
 
-def trainLinearRegressionModel(theta, X, y, lambdaVal):
-	'''
-	train the Linear Regression model
-	'''
-	return op.minimize(fun=linearRegressionCost, x0=theta, args=(X, y, lambdaVal), method='CG', jac=linearRegressionGradient)
+    error = prediction - y
 
-def learningCurve(X,y,Xval,yval,lambdaVal):
-	'''
-	Iterate throught each number of possible learning sample sizes and
-	calculate the error for the training and validation sets
-	'''
+    grad = np.matmul(X, error)/m
 
-	m = X.shape[1]
-	theta = np.ones(X.shape[0]+1)
+    reg = lambdaVal/m*theta
+    reg[0] = 0
 
-	errorTrain = np.array([])
-	errorValidation = np.array([])
-	for i in range(m):
-		results = trainLinearRegressionModel(theta, X[:,:i+1], y[:i+1], lambdaVal)
-		theta = results.x
-		errorTrain = np.append(errorTrain,linearRegressionCost(theta, X[:,:i+1], y[:i+1], lambdaVal))
-		errorValidation = np.append(errorValidation,linearRegressionCost(theta, Xval, yval, lambdaVal))
+    return (grad + reg)
 
-	return [errorTrain, errorValidation]
 
-def polyFeatures(X,p):
-	'''
-	map the features of X to polynomial features for nonlinear solutions
-	'''
-	results = X
+def linearRegressionPredict(X, theta, **kwargs):
+    '''
+    predict the value for the provided linear regression model
+    '''
+    addBias = kwargs.pop('addBias', False)
 
-	for i in range(2,p+1):
-		results = np.append(results,np.power(X,i), axis=0)
+    if(addBias):
+        X = np.insert(
+            X,
+            0,
+            np.ones(X.shape[1]),
+            axis=0)
 
-	return results
+    return np.matmul(X.transpose(), theta)
 
-def featureNormalize(X,**kwargs):
-	'''
-	normalize X by subtracting the mean and dividing the results by the standard deviation
-	'''
 
-	mean = kwargs.pop('mean', X.mean(axis=1))
-	sigma = kwargs.pop('sigma', np.std(X,axis=1))
+def trainLinearRegressionModel(theta, X, y, lambdaVal):
+    '''
+    train the Linear Regression model
+    '''
+    return op.minimize(
+        fun=linearRegressionCost,
+        x0=theta,
+        args=(
+            X,
+            y,
+            lambdaVal),
+        method='CG',
+        jac=linearRegressionGradient)
+
+
+def learningCurve(X, y, Xval, yval, lambdaVal):
+    '''
+    Iterate throught each number of possible learning sample sizes and
+    calculate the error for the training and validation sets
+    '''
+
+    m = X.shape[1]
+    theta = np.ones(X.shape[0]+1)
+
+    errorTrain = np.array([])
+    errorValidation = np.array([])
+    for i in range(m):
+        results = trainLinearRegressionModel(
+            theta,
+            X[:, :i+1],
+            y[:i+1],
+            lambdaVal)
+
+        theta = results.x
+
+        errorTrain = np.append(
+            errorTrain,
+            linearRegressionCost(
+                theta,
+                X[:, :i+1],
+                y[:i+1],
+                lambdaVal))
+
+        errorValidation = np.append(
+            errorValidation,
+            linearRegressionCost(
+                theta,
+                Xval,
+                yval,
+                lambdaVal))
+
+    return [errorTrain, errorValidation]
+
+
+def polyFeatures(X, p):
+    '''
+    map the features of X to polynomial features for nonlinear solutions
+    '''
+    results = X
+
+    for i in range(2, p+1):
+        results = np.append(
+            results,
+            np.power(X, i),
+            axis=0)
+
+    return results
+
+
+def featureNormalize(X, **kwargs):
+    '''normalize X by subtracting the mean and dividing the results by the standard deviation'''
+
+    mean = kwargs.pop(
+        'mean',
+        X.mean(axis=1))
+
+    sigma = kwargs.pop(
+        'sigma',
+        np.std(X, axis=1))
+
+    Xnormalized = (X.transpose()-mean)/sigma
+    return Xnormalized.transpose()
 
-	Xnormalized = (X.transpose()-mean)/sigma
-	return Xnormalized.transpose()
 
 def plotFit(minX, maxX, X, theta, p):
-	'''
-	plot the linear regression line values
-	'''
-	valueMap = np.arange(minX-15, maxX+25, .05)
-	valueMap = valueMap[np.newaxis,:]
-	
-	valueMapPoly = polyFeatures(valueMap, p)
-
-	#calculating mean and standard deviation for normalizing valueMap
-	mean = valueMapPoly.mean(axis=1)
-	sigma = np.std(valueMapPoly,axis=1)
-
-	valueMapPoly = featureNormalize(valueMapPoly,mean=mean,sigma=sigma)
-
-	projection = linearRegressionPredict(valueMapPoly,theta,addBias=True)
-
-	plt.plot(valueMap[0,:],projection,  label = "Regression Line", color='red', linestyle='--')
-	
-def validationCurve(X,y,Xval,yval,lambdaVector):
-	'''
-	Iterate through lamdba values and calculate training error
-	and validation error to choose appropriate value for lambda
-	'''
-	theta = np.ones(X.shape[0]+1)
-
-	errorTrain = np.array([])
-	errorValidation = np.array([])
-
-
-	for lambdaVal in lambdaVector:
-
-		results = trainLinearRegressionModel(theta, X, y, lambdaVal)
-		theta = results.x
-		errorTrain = np.append(errorTrain,linearRegressionCost(theta, X, y, lambdaVal))
-		errorValidation = np.append(errorValidation,linearRegressionCost(theta, Xval, yval, lambdaVal))
-
-	return [errorTrain, errorValidation]
-
-	
+    '''
+    plot the linear regression line values
+    '''
+    valueMap = np.arange(minX-15, maxX+25, .05)
+    valueMap = valueMap[np.newaxis, :]
+
+    valueMapPoly = polyFeatures(valueMap, p)
+
+    # calculating mean and standard deviation for normalizing valueMap
+    mean = valueMapPoly.mean(axis=1)
+    sigma = np.std(valueMapPoly, axis=1)
+
+    valueMapPoly = featureNormalize(
+        valueMapPoly,
+        mean=mean,
+        sigma=sigma)
+
+    projection = linearRegressionPredict(
+        valueMapPoly,
+        theta,
+        addBias=True)
+
+    plt.plot(
+        valueMap[0, :],
+        projection,
+        label="Regression Line",
+        color='red',
+        linestyle='--')
+
+
+def validationCurve(X, y, Xval, yval, lambdaVector):
+    '''
+    Iterate through lamdba values and calculate training error
+    and validation error to choose appropriate value for lambda
+    '''
+    theta = np.ones(X.shape[0]+1)
+
+    errorTrain = np.array([])
+    errorValidation = np.array([])
+
+    for lambdaVal in lambdaVector:
+        results = trainLinearRegressionModel(theta, X, y, lambdaVal)
+        theta = results.x
+
+        errorTrain = np.append(
+            errorTrain,
+            linearRegressionCost(
+                theta,
+                X,
+                y,
+                lambdaVal))
+
+        errorValidation = np.append(
+            errorValidation,
+            linearRegressionCost(
+                theta,
+                Xval,
+                yval,
+                lambdaVal))
+
+    return [errorTrain, errorValidation]
@@ -1,20 +1,16 @@
-## Machine Learning Online Class
-#  Exercise 6 | Support Vector Machines
-#
-#  Instructions
-#  ------------
-# 
-#  This file contains code that helps you get started on the
-#  exercise. You will need to complete the following functions:
-#
-#     gaussianKernel.m - complete
-#     dataset3Params.m
-#     processEmail.m
-#     emailFeatures.m
-#
-#  For this exercise, you will not need to change any code in this file,
-#  or any other files other than those mentioned above.
-#
+"""Machine Learning Online Class
+Exercise 6 | Support Vector Machines
+Instructions
+------------
+This file contains code that helps you get started on the
+exercise. You will need to complete the following functions:
+   gaussianKernel
+   dataset3Params
+   processEmail
+   emailFeatures
+For this exercise, you will not need to change any code in this file,
+or any other files other than those mentioned above.
+"""
 
 # Imports:
 import numpy as np
@@ -23,137 +19,146 @@
 from sklearn import svm
 import ex6helper as helper
 
-## =============== Part 1: Loading and Visualizing Data ================
-#  We start the exercise by first loading and visualizing the dataset. 
-#  The following code will load the dataset into your environment and plot
-#  the data.
-#
-print('Loading and Visualizing Data ...')
 
-# Load from ex6data1: 
-# You will have X, y in your environment
+def main():
+    #  =============== Part 1: Loading and Visualizing Data ================
+    #  We start the exercise by first loading and visualizing the dataset.
+    #  The following code will load the dataset into your environment and plot
+    #  the data.
+    print('Loading and Visualizing Data ...')
 
-mat = io.loadmat('./data/ex6data1.mat')
+    # Load from ex6data1:
+    # You will have X, y in your environment
 
-X = mat['X']
+    mat = io.loadmat('./data/ex6data1.mat')
 
-y = mat['y'].astype(int).ravel()
+    X = mat['X']
 
-helper.plotData(X,y, addBias=True)
-plt.show()
+    y = mat['y'].astype(int).ravel()
 
-input('\nPart 1 completed. Program paused. Press enter to continue: ')
+    helper.plotData(X, y, addBias=True)
+    plt.show()
 
-## ==================== Part 2: Training Linear SVM ====================
-#  The following code will train a linear SVM on the dataset and plot the
-#  decision boundary learned.
-#
+    input('\nPart 1 completed. Program paused. Press enter to continue: ')
 
-print('\nTraining Linear SVM ...')
+    #  ==================== Part 2: Training Linear SVM ====================
+    #  The following code will train a linear SVM on the dataset and plot the
+    #  decision boundary learned.
+    print('\nTraining Linear SVM ...')
 
-# You should try to change the C value below and see how the decision
-# boundary varies (e.g., try C = 1000)
-C = 1
+    # You should try to change the C value below and see how the decision
+    # boundary varies (e.g., try C = 1000)
+    C = 1
 
-model = svm.SVC(C=1, max_iter=100, tol=.01, kernel='linear')
-model.fit(X,y)
+    model = svm.SVC(
+        C=1,
+        max_iter=100,
+        tol=.01,
+        kernel='linear')
 
+    model.fit(X, y)
+    helper.visualizeBoundary(X, y, model)
+    plt.show()
 
-helper.visualizeBoundary(X,y,model)
-plt.show()
+    input('\nPart 2 completed. Program paused. Press enter to continue: ')
 
-input('\nPart 2 completed. Program paused. Press enter to continue: ')
+    # =============== Part 3: Implementing Gaussian Kernel ===============
+    #  You will now implement the Gaussian kernel to use
+    #  with the SVM. You should complete the code in gaussianKernel
 
-#% =============== Part 3: Implementing Gaussian Kernel ===============
-#  You will now implement the Gaussian kernel to use
-#  with the SVM. You should complete the code in gaussianKernel.m
-#
-print('\nEvaluating the Gaussian Kernel ...')
+    print('\nEvaluating the Gaussian Kernel ...')
 
-X1 = np.array([1, 2, 1])
-X2 = np.array([0, 4, -1])
-sim = helper.gaussianKernel(X1, X2, sigma=2)
+    X1 = np.array([1, 2, 1])
+    X2 = np.array([0, 4, -1])
+    sim = helper.gaussianKernel(X1, X2, sigma=2)
 
-print('Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {:.6f}'.format(sim))
-print('(for sigma = 2, this value should be about 0.324652)')
+    print('Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = {:.6f}'.format(sim))
+    print('(for sigma = 2, this value should be about 0.324652)')
 
-input('\nPart 3 completed. Program paused. Press enter to continue: ')
+    input('\nPart 3 completed. Program paused. Press enter to continue: ')
 
-## =============== Part 4: Visualizing Dataset 2 ================
-#  The following code will load the next dataset into your environment and
-#  plot the data.
-#
+    #  =============== Part 4: Visualizing Dataset 2 ================
+    #  The following code will load the next dataset into your environment and
+    #  plot the data.
 
-print('\nLoading and Visualizing Data ...')
+    print('\nLoading and Visualizing Data ...')
 
-mat = io.loadmat('./data/ex6data2.mat')
+    mat = io.loadmat('./data/ex6data2.mat')
 
-X = mat['X']
-y = mat['y'].astype(int).ravel()
+    X = mat['X']
+    y = mat['y'].astype(int).ravel()
 
-helper.plotData(X,y, addBias=True)
-plt.show()
+    helper.plotData(X, y, addBias=True)
+    plt.show()
 
-input('\nPart 4 completed. Program paused. Press enter to continue: ')
+    input('\nPart 4 completed. Program paused. Press enter to continue: ')
 
-## ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
-#  After you have implemented the kernel, we can now use it to train the
-#  SVM classifier.
-#
-print('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...')
+    #  ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
+    #  After you have implemented the kernel, we can now use it to train the
+    #  SVM classifier.
+    print('\nTraining SVM with RBF Kernel (this may take 1 to 2 minutes) ...')
 
-# Train the SVM with the Gaussian kernel on this dataset.
-sigma = 0.1
-gamma = np.power(sigma,-2.)
+    # Train the SVM with the Gaussian kernel on this dataset.
+    sigma = 0.1
+    gamma = np.power(sigma, -2.)
 
-model = svm.SVC(C=1, kernel='rbf', gamma=gamma)
-model.fit(X, y.flatten())
+    model = svm.SVC(
+        C=1,
+        kernel='rbf',
+        gamma=gamma)
 
-helper.visualizeBoundary(X,y,model)
-plt.show()
+    model.fit(X, y.flatten())
+    helper.visualizeBoundary(X, y, model)
+    plt.show()
 
-input('\nPart 5 completed. Program paused. Press enter to continue: ')
+    input('\nPart 5 completed. Program paused. Press enter to continue: ')
 
-## =============== Part 6: Visualizing Dataset 3 ================
-#  The following code will load the next dataset into your environment and 
-#  plot the data. 
-#
+    #  =============== Part 6: Visualizing Dataset 3 ================
+    #  The following code will load the next dataset into your environment and
+    #  plot the data.
 
-print('\nLoading and Visualizing Data ...')
+    print('\nLoading and Visualizing Data ...')
 
-mat = io.loadmat('./data/ex6data3.mat')
+    mat = io.loadmat('./data/ex6data3.mat')
 
-X = mat['X']
-y = mat['y'].astype(int).ravel()
+    X = mat['X']
+    y = mat['y'].astype(int).ravel()
 
-helper.plotData(X,y, addBias=True)
-plt.show()
+    helper.plotData(X, y, addBias=True)
+    plt.show()
 
-input('\nPart 6 completed. Program paused. Press enter to continue: ')
+    input('\nPart 6 completed. Program paused. Press enter to continue: ')
 
-## ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========
-#  This is a different dataset that you can use to experiment with. Try
-#  different values of C and sigma here.
-#
+    #  ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========
+    #  This is a different dataset that you can use to experiment with. Try
+    #  different values of C and sigma here.
+    #
 
-Xval = mat['Xval']
-yval = mat['yval'].astype(int).ravel()
+    Xval = mat['Xval']
+    yval = mat['yval'].astype(int).ravel()
 
+    # get optimal parameters
+    [C, sigma] = helper.dataset3Params(
+        X,
+        y,
+        Xval,
+        yval)
 
-#get optimal parameters
-[C, sigma] = helper.dataset3Params(X,y,Xval,yval)
-gamma = np.power(sigma,-2.)
+    gamma = np.power(sigma, -2.)
 
-print('\nFound C & Sigma: {} & {}'.format(C, sigma))
+    print('\nFound C & Sigma: {} & {}'.format(C, sigma))
 
-#train the model
-model = svm.SVC(C=C, kernel='rbf', gamma=gamma)
-model.fit(X, y.flatten())
+    # train the model
+    model = svm.SVC(
+        C=C,
+        kernel='rbf',
+        gamma=gamma)
 
+    model.fit(X, y.flatten())
+    helper.visualizeBoundary(X, y, model)
+    plt.show()
 
-#visualize data
-helper.visualizeBoundary(X,y,model)
-plt.show()
+    input('\nPart 7 completed. Program completed. Press enter to exit: ')
 
-
-input('\nPart 7 completed. Program completed. Press enter to exit: ')
+if __name__ == '__main__':
+    main()