SIPUNK · Oct 31, 2017
diff --git a/Diff for: ‎machine-learning-ex8/ex8.pdf
242 KB b/Diff for: ‎machine-learning-ex8/ex8.pdf
242 KB
diff --git a/Diff for: ‎machine-learning-ex8/ex8/loadMovieList.m
+25 b/Diff for: ‎machine-learning-ex8/ex8/loadMovieList.m
+25
diff --git a/Diff for: ‎machine-learning-ex8/ex8/movie_ids.txt
+1,682 b/Diff for: ‎machine-learning-ex8/ex8/movie_ids.txt
+1,682
diff --git a/Diff for: ‎machine-learning-ex8/ex8/multivariateGaussian.m
+22 b/Diff for: ‎machine-learning-ex8/ex8/multivariateGaussian.m
+22
diff --git a/Diff for: ‎machine-learning-ex8/ex8/normalizeRatings.m
+17 b/Diff for: ‎machine-learning-ex8/ex8/normalizeRatings.m
+17
diff --git a/Diff for: ‎machine-learning-ex8/ex8/selectThreshold.m
+56 b/Diff for: ‎machine-learning-ex8/ex8/selectThreshold.m
+56
diff --git a/Diff for: ‎machine-learning-ex8/ex8/submit.m
+77 b/Diff for: ‎machine-learning-ex8/ex8/submit.m
+77
diff --git a/Diff for: ‎machine-learning-ex8/ex8/token.mat
263 Bytes b/Diff for: ‎machine-learning-ex8/ex8/token.mat
263 Bytes
diff --git a/Diff for: ‎machine-learning-ex8/ex8/visualizeFit.m
+20 b/Diff for: ‎machine-learning-ex8/ex8/visualizeFit.m
+20
@@ -0,0 +1,25 @@
+function movieList = loadMovieList()
+%GETMOVIELIST reads the fixed movie list in movie.txt and returns a
+%cell array of the words
+%   movieList = GETMOVIELIST() reads the fixed movie list in movie.txt 
+%   and returns a cell array of the words in movieList.
+
+
+%% Read the fixed movieulary list
+fid = fopen('movie_ids.txt');
+
+% Store all movies in cell array movie{}
+n = 1682;  % Total number of movies 
+
+movieList = cell(n, 1);
+for i = 1:n
+    % Read line
+    line = fgets(fid);
+    % Word Index (can ignore since it will be = i)
+    [idx, movieName] = strtok(line, ' ');
+    % Actual Word
+    movieList{i} = strtrim(movieName);
+end
+fclose(fid);
+
+end
@@ -0,0 +1,22 @@
+function p = multivariateGaussian(X, mu, Sigma2)
+%MULTIVARIATEGAUSSIAN Computes the probability density function of the
+%multivariate gaussian distribution.
+%    p = MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability 
+%    density function of the examples X under the multivariate gaussian 
+%    distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is
+%    treated as the covariance matrix. If Sigma2 is a vector, it is treated
+%    as the \sigma^2 values of the variances in each dimension (a diagonal
+%    covariance matrix)
+%
+
+k = length(mu);
+
+if (size(Sigma2, 2) == 1) || (size(Sigma2, 1) == 1)
+    Sigma2 = diag(Sigma2);
+end
+%fprintf('Sdet: %e\n',det(Sigma2));
+X = bsxfun(@minus, X, mu(:)');
+p = (2 * pi) ^ (- k / 2) * det(Sigma2) ^ (-0.5) * ...
+    exp(-0.5 * sum(bsxfun(@times, X * pinv(Sigma2), X), 2));
+
+end
@@ -0,0 +1,17 @@
+function [Ynorm, Ymean] = normalizeRatings(Y, R)
+%NORMALIZERATINGS Preprocess data by subtracting mean rating for every 
+%movie (every row)
+%   [Ynorm, Ymean] = NORMALIZERATINGS(Y, R) normalized Y so that each movie
+%   has a rating of 0 on average, and returns the mean rating in Ymean.
+%
+
+[m, n] = size(Y);
+Ymean = zeros(m, 1);
+Ynorm = zeros(size(Y));
+for i = 1:m
+    idx = find(R(i, :) == 1);
+    Ymean(i) = mean(Y(i, idx));
+    Ynorm(i, idx) = Y(i, idx) - Ymean(i);
+end
+
+end
@@ -0,0 +1,56 @@
+function [bestEpsilon bestF1] = selectThreshold(yval, pval)
+%SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting
+%outliers
+%   [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best
+%   threshold to use for selecting outliers based on the results from a
+%   validation set (pval) and the ground truth (yval).
+%
+
+bestEpsilon = 0;
+bestF1 = 0;
+F1 = 0;
+
+stepsize = (max(pval) - min(pval)) / 1000;
+for epsilon = min(pval):stepsize:max(pval)
+    
+    % ====================== YOUR CODE HERE ======================
+    % Instructions: Compute the F1 score of choosing epsilon as the
+    %               threshold and place the value in F1. The code at the
+    %               end of the loop will compare the F1 score for this
+    %               choice of epsilon and set it to be the best epsilon if
+    %               it is better than the current choice of epsilon.
+    %               
+    % Note: You can use predictions = (pval < epsilon) to get a binary vector
+    %       of 0's and 1's of the outlier predictions
+
+ predictions = (pval < epsilon);
+    
+    fp = sum((predictions == 1) & (yval == 0));
+    fn = sum((predictions == 0) & (yval == 1));
+    tp = sum((predictions == 1) & (yval == 1));
+
+    prec = tp / (tp + fp);
+    rec = tp / (tp + fn);
+
+    F1 = 2 * prec * rec / (prec + rec);
+
+
+
+
+
+
+
+
+
+
+
+
+    % =============================================================
+
+    if F1 > bestF1
+       bestF1 = F1;
+       bestEpsilon = epsilon;
+    end
+end
+
+end
@@ -0,0 +1,77 @@
+function submit()
+  addpath('./lib');
+
+  conf.assignmentSlug = 'anomaly-detection-and-recommender-systems';
+  conf.itemName = 'Anomaly Detection and Recommender Systems';
+  conf.partArrays = { ...
+    { ...
+      '1', ...
+      { 'estimateGaussian.m' }, ...
+      'Estimate Gaussian Parameters', ...
+    }, ...
+    { ...
+      '2', ...
+      { 'selectThreshold.m' }, ...
+      'Select Threshold', ...
+    }, ...
+    { ...
+      '3', ...
+      { 'cofiCostFunc.m' }, ...
+      'Collaborative Filtering Cost', ...
+    }, ...
+    { ...
+      '4', ...
+      { 'cofiCostFunc.m' }, ...
+      'Collaborative Filtering Gradient', ...
+    }, ...
+    { ...
+      '5', ...
+      { 'cofiCostFunc.m' }, ...
+      'Regularized Cost', ...
+    }, ...
+    { ...
+      '6', ...
+      { 'cofiCostFunc.m' }, ...
+      'Regularized Gradient', ...
+    }, ...
+  };
+  conf.output = @output;
+
+  submitWithConfiguration(conf);
+end
+
+function out = output(partId, auxstring)
+  % Random Test Cases
+  n_u = 3; n_m = 4; n = 5;
+  X = reshape(sin(1:n_m*n), n_m, n);
+  Theta = reshape(cos(1:n_u*n), n_u, n);
+  Y = reshape(sin(1:2:2*n_m*n_u), n_m, n_u);
+  R = Y > 0.5;
+  pval = [abs(Y(:)) ; 0.001; 1];
+  Y = (Y .* double(R));  % set 'Y' values to 0 for movies not reviewed
+  yval = [R(:) ; 1; 0];
+  params = [X(:); Theta(:)];
+  if partId == '1'
+    [mu sigma2] = estimateGaussian(X);
+    out = sprintf('%0.5f ', [mu(:); sigma2(:)]);
+  elseif partId == '2'
+    [bestEpsilon bestF1] = selectThreshold(yval, pval);
+    out = sprintf('%0.5f ', [bestEpsilon(:); bestF1(:)]);
+  elseif partId == '3'
+    [J] = cofiCostFunc(params, Y, R, n_u, n_m, ...
+                       n, 0);
+    out = sprintf('%0.5f ', J(:));
+  elseif partId == '4'
+    [J, grad] = cofiCostFunc(params, Y, R, n_u, n_m, ...
+                             n, 0);
+    out = sprintf('%0.5f ', grad(:));
+  elseif partId == '5'
+    [J] = cofiCostFunc(params, Y, R, n_u, n_m, ...
+                       n, 1.5);
+    out = sprintf('%0.5f ', J(:));
+  elseif partId == '6'
+    [J, grad] = cofiCostFunc(params, Y, R, n_u, n_m, ...
+                             n, 1.5);
+    out = sprintf('%0.5f ', grad(:));
+  end 
+end
@@ -0,0 +1,20 @@
+function visualizeFit(X, mu, sigma2)
+%VISUALIZEFIT Visualize the dataset and its estimated distribution.
+%   VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the 
+%   probability density function of the Gaussian distribution. Each example
+%   has a location (x1, x2) that depends on its feature values.
+%
+
+[X1,X2] = meshgrid(0:.5:35); 
+Z = multivariateGaussian([X1(:) X2(:)],mu,sigma2);
+Z = reshape(Z,size(X1));
+
+plot(X(:, 1), X(:, 2),'bx');
+hold on;
+% Do not plot if there are infinities
+if (sum(isinf(Z)) == 0)
+    contour(X1, X2, Z, 10.^(-20:3:0)');
+end
+hold off;
+
+end