|
| 1 | +%% Machine Learning Online Class - Exercise 2: Logistic Regression |
| 2 | +% |
| 3 | +% Instructions |
| 4 | +% ------------ |
| 5 | +% |
| 6 | +% This file contains code that helps you get started on the logistic |
| 7 | +% regression exercise. You will need to complete the following functions |
| 8 | +% in this exericse: |
| 9 | +% |
| 10 | +% sigmoid.m |
| 11 | +% costFunction.m |
| 12 | +% predict.m |
| 13 | +% costFunctionReg.m |
| 14 | +% |
| 15 | +% For this exercise, you will not need to change any code in this file, |
| 16 | +% or any other files other than those mentioned above. |
| 17 | +% |
| 18 | + |
| 19 | +%% Initialization |
| 20 | +clear ; close all; clc |
| 21 | + |
| 22 | +%% Load Data |
| 23 | +% The first two columns contains the exam scores and the third column |
| 24 | +% contains the label. |
| 25 | + |
| 26 | +data = load('ex2data1.txt'); |
| 27 | +X = data(:, [1, 2]); y = data(:, 3); |
| 28 | + |
| 29 | +%% ==================== Part 1: Plotting ==================== |
| 30 | +% We start the exercise by first plotting the data to understand the |
| 31 | +% the problem we are working with. |
| 32 | + |
| 33 | +fprintf(['Plotting data with + indicating (y = 1) examples and o ' ... |
| 34 | + 'indicating (y = 0) examples.\n']); |
| 35 | + |
| 36 | +plotData(X, y); |
| 37 | + |
| 38 | +% Put some labels |
| 39 | +hold on; |
| 40 | +% Labels and Legend |
| 41 | +xlabel('Exam 1 score') |
| 42 | +ylabel('Exam 2 score') |
| 43 | + |
| 44 | +% Specified in plot order |
| 45 | +legend('Admitted', 'Not admitted') |
| 46 | +hold off; |
| 47 | + |
| 48 | +fprintf('\nProgram paused. Press enter to continue.\n'); |
| 49 | +pause; |
| 50 | + |
| 51 | + |
| 52 | +%% ============ Part 2: Compute Cost and Gradient ============ |
| 53 | +% In this part of the exercise, you will implement the cost and gradient |
| 54 | +% for logistic regression. You neeed to complete the code in |
| 55 | +% costFunction.m |
| 56 | + |
| 57 | +% Setup the data matrix appropriately, and add ones for the intercept term |
| 58 | +[m, n] = size(X); |
| 59 | + |
| 60 | +% Add intercept term to x and X_test |
| 61 | +X = [ones(m, 1) X]; |
| 62 | + |
| 63 | +% Initialize fitting parameters |
| 64 | +initial_theta = zeros(n + 1, 1); |
| 65 | + |
| 66 | +% Compute and display initial cost and gradient |
| 67 | +[cost, grad] = costFunction(initial_theta, X, y); |
| 68 | + |
| 69 | +fprintf('Cost at initial theta (zeros): %f\n', cost); |
| 70 | +fprintf('Expected cost (approx): 0.693\n'); |
| 71 | +fprintf('Gradient at initial theta (zeros): \n'); |
| 72 | +fprintf(' %f \n', grad); |
| 73 | +fprintf('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n'); |
| 74 | + |
| 75 | +% Compute and display cost and gradient with non-zero theta |
| 76 | +test_theta = [-24; 0.2; 0.2]; |
| 77 | +[cost, grad] = costFunction(test_theta, X, y); |
| 78 | + |
| 79 | +fprintf('\nCost at test theta: %f\n', cost); |
| 80 | +fprintf('Expected cost (approx): 0.218\n'); |
| 81 | +fprintf('Gradient at test theta: \n'); |
| 82 | +fprintf(' %f \n', grad); |
| 83 | +fprintf('Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n'); |
| 84 | + |
| 85 | +fprintf('\nProgram paused. Press enter to continue.\n'); |
| 86 | +pause; |
| 87 | + |
| 88 | + |
| 89 | +%% ============= Part 3: Optimizing using fminunc ============= |
| 90 | +% In this exercise, you will use a built-in function (fminunc) to find the |
| 91 | +% optimal parameters theta. |
| 92 | + |
| 93 | +% Set options for fminunc |
| 94 | +options = optimset('GradObj', 'on', 'MaxIter', 400); |
| 95 | + |
| 96 | +% Run fminunc to obtain the optimal theta |
| 97 | +% This function will return theta and the cost |
| 98 | +[theta, cost] = ... |
| 99 | + fminunc(@(t)(costFunction(t, X, y)), initial_theta, options); |
| 100 | + |
| 101 | +% Print theta to screen |
| 102 | +fprintf('Cost at theta found by fminunc: %f\n', cost); |
| 103 | +fprintf('Expected cost (approx): 0.203\n'); |
| 104 | +fprintf('theta: \n'); |
| 105 | +fprintf(' %f \n', theta); |
| 106 | +fprintf('Expected theta (approx):\n'); |
| 107 | +fprintf(' -25.161\n 0.206\n 0.201\n'); |
| 108 | + |
| 109 | +% Plot Boundary |
| 110 | +plotDecisionBoundary(theta, X, y); |
| 111 | + |
| 112 | +% Put some labels |
| 113 | +hold on; |
| 114 | +% Labels and Legend |
| 115 | +xlabel('Exam 1 score') |
| 116 | +ylabel('Exam 2 score') |
| 117 | + |
| 118 | +% Specified in plot order |
| 119 | +legend('Admitted', 'Not admitted') |
| 120 | +hold off; |
| 121 | + |
| 122 | +fprintf('\nProgram paused. Press enter to continue.\n'); |
| 123 | +pause; |
| 124 | + |
| 125 | +%% ============== Part 4: Predict and Accuracies ============== |
| 126 | +% After learning the parameters, you'll like to use it to predict the outcomes |
| 127 | +% on unseen data. In this part, you will use the logistic regression model |
| 128 | +% to predict the probability that a student with score 45 on exam 1 and |
| 129 | +% score 85 on exam 2 will be admitted. |
| 130 | +% |
| 131 | +% Furthermore, you will compute the training and test set accuracies of |
| 132 | +% our model. |
| 133 | +% |
| 134 | +% Your task is to complete the code in predict.m |
| 135 | + |
| 136 | +% Predict probability for a student with score 45 on exam 1 |
| 137 | +% and score 85 on exam 2 |
| 138 | + |
| 139 | +prob = sigmoid([1 45 85] * theta); |
| 140 | +fprintf(['For a student with scores 45 and 85, we predict an admission ' ... |
| 141 | + 'probability of %f\n'], prob); |
| 142 | +fprintf('Expected value: 0.775 +/- 0.002\n\n'); |
| 143 | + |
| 144 | +% Compute accuracy on our training set |
| 145 | +p = predict(theta, X); |
| 146 | + |
| 147 | +fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100); |
| 148 | +fprintf('Expected accuracy (approx): 89.0\n'); |
| 149 | +fprintf('\n'); |
| 150 | + |
| 151 | + |
0 commit comments