Skip to content

Commit a2c70ff

Browse files
authoredOct 31, 2017
Add files via upload
1 parent 723e4ab commit a2c70ff

File tree

9 files changed

+1899
-0
lines changed

9 files changed

+1899
-0
lines changed
 

Diff for: ‎machine-learning-ex8/ex8.pdf

242 KB
Binary file not shown.

Diff for: ‎machine-learning-ex8/ex8/loadMovieList.m

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
function movieList = loadMovieList()
2+
%GETMOVIELIST reads the fixed movie list in movie.txt and returns a
3+
%cell array of the words
4+
% movieList = GETMOVIELIST() reads the fixed movie list in movie.txt
5+
% and returns a cell array of the words in movieList.
6+
7+
8+
%% Read the fixed movieulary list
9+
fid = fopen('movie_ids.txt');
10+
11+
% Store all movies in cell array movie{}
12+
n = 1682; % Total number of movies
13+
14+
movieList = cell(n, 1);
15+
for i = 1:n
16+
% Read line
17+
line = fgets(fid);
18+
% Word Index (can ignore since it will be = i)
19+
[idx, movieName] = strtok(line, ' ');
20+
% Actual Word
21+
movieList{i} = strtrim(movieName);
22+
end
23+
fclose(fid);
24+
25+
end

Diff for: ‎machine-learning-ex8/ex8/movie_ids.txt

+1,682
Large diffs are not rendered by default.

Diff for: ‎machine-learning-ex8/ex8/multivariateGaussian.m

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
function p = multivariateGaussian(X, mu, Sigma2)
2+
%MULTIVARIATEGAUSSIAN Computes the probability density function of the
3+
%multivariate gaussian distribution.
4+
% p = MULTIVARIATEGAUSSIAN(X, mu, Sigma2) Computes the probability
5+
% density function of the examples X under the multivariate gaussian
6+
% distribution with parameters mu and Sigma2. If Sigma2 is a matrix, it is
7+
% treated as the covariance matrix. If Sigma2 is a vector, it is treated
8+
% as the \sigma^2 values of the variances in each dimension (a diagonal
9+
% covariance matrix)
10+
%
11+
12+
k = length(mu);
13+
14+
if (size(Sigma2, 2) == 1) || (size(Sigma2, 1) == 1)
15+
Sigma2 = diag(Sigma2);
16+
end
17+
%fprintf('Sdet: %e\n',det(Sigma2));
18+
X = bsxfun(@minus, X, mu(:)');
19+
p = (2 * pi) ^ (- k / 2) * det(Sigma2) ^ (-0.5) * ...
20+
exp(-0.5 * sum(bsxfun(@times, X * pinv(Sigma2), X), 2));
21+
22+
end

Diff for: ‎machine-learning-ex8/ex8/normalizeRatings.m

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
function [Ynorm, Ymean] = normalizeRatings(Y, R)
2+
%NORMALIZERATINGS Preprocess data by subtracting mean rating for every
3+
%movie (every row)
4+
% [Ynorm, Ymean] = NORMALIZERATINGS(Y, R) normalized Y so that each movie
5+
% has a rating of 0 on average, and returns the mean rating in Ymean.
6+
%
7+
8+
[m, n] = size(Y);
9+
Ymean = zeros(m, 1);
10+
Ynorm = zeros(size(Y));
11+
for i = 1:m
12+
idx = find(R(i, :) == 1);
13+
Ymean(i) = mean(Y(i, idx));
14+
Ynorm(i, idx) = Y(i, idx) - Ymean(i);
15+
end
16+
17+
end

Diff for: ‎machine-learning-ex8/ex8/selectThreshold.m

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
function [bestEpsilon bestF1] = selectThreshold(yval, pval)
2+
%SELECTTHRESHOLD Find the best threshold (epsilon) to use for selecting
3+
%outliers
4+
% [bestEpsilon bestF1] = SELECTTHRESHOLD(yval, pval) finds the best
5+
% threshold to use for selecting outliers based on the results from a
6+
% validation set (pval) and the ground truth (yval).
7+
%
8+
9+
bestEpsilon = 0;
10+
bestF1 = 0;
11+
F1 = 0;
12+
13+
stepsize = (max(pval) - min(pval)) / 1000;
14+
for epsilon = min(pval):stepsize:max(pval)
15+
16+
% ====================== YOUR CODE HERE ======================
17+
% Instructions: Compute the F1 score of choosing epsilon as the
18+
% threshold and place the value in F1. The code at the
19+
% end of the loop will compare the F1 score for this
20+
% choice of epsilon and set it to be the best epsilon if
21+
% it is better than the current choice of epsilon.
22+
%
23+
% Note: You can use predictions = (pval < epsilon) to get a binary vector
24+
% of 0's and 1's of the outlier predictions
25+
26+
predictions = (pval < epsilon);
27+
28+
fp = sum((predictions == 1) & (yval == 0));
29+
fn = sum((predictions == 0) & (yval == 1));
30+
tp = sum((predictions == 1) & (yval == 1));
31+
32+
prec = tp / (tp + fp);
33+
rec = tp / (tp + fn);
34+
35+
F1 = 2 * prec * rec / (prec + rec);
36+
37+
38+
39+
40+
41+
42+
43+
44+
45+
46+
47+
48+
% =============================================================
49+
50+
if F1 > bestF1
51+
bestF1 = F1;
52+
bestEpsilon = epsilon;
53+
end
54+
end
55+
56+
end

Diff for: ‎machine-learning-ex8/ex8/submit.m

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
function submit()
2+
addpath('./lib');
3+
4+
conf.assignmentSlug = 'anomaly-detection-and-recommender-systems';
5+
conf.itemName = 'Anomaly Detection and Recommender Systems';
6+
conf.partArrays = { ...
7+
{ ...
8+
'1', ...
9+
{ 'estimateGaussian.m' }, ...
10+
'Estimate Gaussian Parameters', ...
11+
}, ...
12+
{ ...
13+
'2', ...
14+
{ 'selectThreshold.m' }, ...
15+
'Select Threshold', ...
16+
}, ...
17+
{ ...
18+
'3', ...
19+
{ 'cofiCostFunc.m' }, ...
20+
'Collaborative Filtering Cost', ...
21+
}, ...
22+
{ ...
23+
'4', ...
24+
{ 'cofiCostFunc.m' }, ...
25+
'Collaborative Filtering Gradient', ...
26+
}, ...
27+
{ ...
28+
'5', ...
29+
{ 'cofiCostFunc.m' }, ...
30+
'Regularized Cost', ...
31+
}, ...
32+
{ ...
33+
'6', ...
34+
{ 'cofiCostFunc.m' }, ...
35+
'Regularized Gradient', ...
36+
}, ...
37+
};
38+
conf.output = @output;
39+
40+
submitWithConfiguration(conf);
41+
end
42+
43+
function out = output(partId, auxstring)
44+
% Random Test Cases
45+
n_u = 3; n_m = 4; n = 5;
46+
X = reshape(sin(1:n_m*n), n_m, n);
47+
Theta = reshape(cos(1:n_u*n), n_u, n);
48+
Y = reshape(sin(1:2:2*n_m*n_u), n_m, n_u);
49+
R = Y > 0.5;
50+
pval = [abs(Y(:)) ; 0.001; 1];
51+
Y = (Y .* double(R)); % set 'Y' values to 0 for movies not reviewed
52+
yval = [R(:) ; 1; 0];
53+
params = [X(:); Theta(:)];
54+
if partId == '1'
55+
[mu sigma2] = estimateGaussian(X);
56+
out = sprintf('%0.5f ', [mu(:); sigma2(:)]);
57+
elseif partId == '2'
58+
[bestEpsilon bestF1] = selectThreshold(yval, pval);
59+
out = sprintf('%0.5f ', [bestEpsilon(:); bestF1(:)]);
60+
elseif partId == '3'
61+
[J] = cofiCostFunc(params, Y, R, n_u, n_m, ...
62+
n, 0);
63+
out = sprintf('%0.5f ', J(:));
64+
elseif partId == '4'
65+
[J, grad] = cofiCostFunc(params, Y, R, n_u, n_m, ...
66+
n, 0);
67+
out = sprintf('%0.5f ', grad(:));
68+
elseif partId == '5'
69+
[J] = cofiCostFunc(params, Y, R, n_u, n_m, ...
70+
n, 1.5);
71+
out = sprintf('%0.5f ', J(:));
72+
elseif partId == '6'
73+
[J, grad] = cofiCostFunc(params, Y, R, n_u, n_m, ...
74+
n, 1.5);
75+
out = sprintf('%0.5f ', grad(:));
76+
end
77+
end

Diff for: ‎machine-learning-ex8/ex8/token.mat

263 Bytes
Binary file not shown.

Diff for: ‎machine-learning-ex8/ex8/visualizeFit.m

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
function visualizeFit(X, mu, sigma2)
2+
%VISUALIZEFIT Visualize the dataset and its estimated distribution.
3+
% VISUALIZEFIT(X, p, mu, sigma2) This visualization shows you the
4+
% probability density function of the Gaussian distribution. Each example
5+
% has a location (x1, x2) that depends on its feature values.
6+
%
7+
8+
[X1,X2] = meshgrid(0:.5:35);
9+
Z = multivariateGaussian([X1(:) X2(:)],mu,sigma2);
10+
Z = reshape(Z,size(X1));
11+
12+
plot(X(:, 1), X(:, 2),'bx');
13+
hold on;
14+
% Do not plot if there are infinities
15+
if (sum(isinf(Z)) == 0)
16+
contour(X1, X2, Z, 10.^(-20:3:0)');
17+
end
18+
hold off;
19+
20+
end

0 commit comments

Comments
 (0)
Please sign in to comment.