-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtry_opt_func.m
158 lines (136 loc) · 5.88 KB
/
try_opt_func.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
%% Try optimization function to obtain variation sources Z
%Generate the observations of Gaussian profiles, X, using variation sources
%Z. Then, use the basic model proposed by Prof. Apley to get the constant
%matrix A. Starting with A and X, use optimization function of MATLAB to
%find out the variation sources to see if it can return the correct
%answers.
%% Generate Data
% Initialize parameters
options = ini_options();
% Specify the seed of random number generator
rngn = 2;
rng(rngn); %Set seed for random generator
% Generate random (p-dimensional) z's which are sources of variation
N = options.N; %Number of data points
p = options.p; %Dimension of variation sources
dd = options.dd;
psize = options.psize;
I = eye(N);
One = ones(N);
Z = rand(N,p); %N of p-dimenional points z
title_text1 = 'Originally generated 2-d data as variation source';
scat_func_tag = 1;
scatter_label2d_func = @scatter_label2d;
if scat_func_tag == 1
tag = 0; %tag for showing index of data in 2d scatter plot (0: not showing)
color = [0.7*ones(N,1),Z(:,1)/max(Z(:,1)),Z(:,2)/max(Z(:,2))];
scatter_label2d_func(Z,title_text1,dd,tag,psize,color) %Plot scatter plot of Z
elseif scat_func_tag == 2
tag = 1;
color = [0.7*ones(N,1),Z(:,1)/max(Z(:,1)),0.3*ones(N,1)];
a = int16(Z(:,2)*100); b = num2str(a); c = cellstr(b); %Labels
scatter_label2d_func(Z,title_text1,dd,tag,psize,color,c) %Plot scatter plot of Z
end
% Generated data set 1
% %Generate n-D embeded data at higher-dimensional space
% n = 10; %Dimension of observed data
% sigma_kernel = 0.3;
% [X,K] = generate_GK_nd(Z,n,sigma_kernel);
% Generated data set 2
%Generate N Gaussian profile images (2-D) as high-dimensional observations
%sigma_data: the sigma of those Gaussian profiles
%l: the largest index of pixel in one side of images of those Gaussian
sigma_data = options.sigma_data;
l = options.l;
n = (l+1)^2;
X = generate_Gaussian_profile(Z*l,N,sigma_data,l);
%Plot PCA score to see if there is any nonlinear pattern in the generated
%observed data
pct = options.pct; %the percentage of threhold eigen-values
pc1 = options.pc1; %The index of the first component to be plotted
pc2 = options.pc2;
pc3 = options.pc3;
az = options.az;
el = options.el;
title_text2 = 'Principle components of';
color_3D = [0.7*ones(N,1),Z(:,1)/max(Z(:,1)),Z(:,2)/max(Z(:,2))];
%Standardize each coordinates of X; in other words, for each column,
%substract means and divide by unbiased standard deviation of that column
std_X = std(X,0,1);
X_std = (X-repmat(mean(X,1),N,1))/diag(std_X);
%X_std = X;
%PCA on standardized observations
title_text3 = 'Standardized version: Principle components of';
[PC_std_ind,eig_values_std] = scatter_PCA_3d(X_std,pc1,pc2,pc3,pct,title_text3,color_3D,psize,az,el);
%%Inverse KPCA
%In svd (singular value decomposition, the singular values are sorted
%in non-increasing order.
[V,D,U] = svd(X_std/sqrt(N-1),'econ');
%Principle components of X (PCA scores)
PC_X = V*D(:,1:length(PC_std_ind));
%The standardized observations doesn't have full column rank, so we
%should pick out those columns of V that correspond to non-zero
%singular values.
%Plot 2-D scattering plot of first two coordinates obtained by PCA
title_text_PCA = 'Principle components of observations X';
scatter_label2d_func(PC_X(:,[1,2]),title_text_PCA,dd,tag,psize,color)
saveas(gca,[options.cwd,['2-3']],'jpg');
saveas(gca,[options.cwd,['2-3']],'fig');
%% Estimate Variation Sources
% Given vector a, find true coefficient matrix A
sigma_alg = options.sigma_alg;
a = ones(n,1);
K_true = Gaussian_Kernel(Z,sigma_alg);
B = X'-a*ones(1,N);
% A_true = B/K_true;
% In calculation, I found that the K_true is close to singular, so that I
% need to use regularized least-square
% Regularized coefficients
lambda = 10.^(-20:-1);
% Error of estimation
err = zeros(1,length(lambda));
for i = 1:length(lambda)
A_MAP = ((lambda(i)*eye(N)+K_true*K_true)\K_true*(X-ones(N,1)*a'))';
err(i) = sum(sum((B-A_MAP*K_true).^2))/n/N;
i
end
% There still some round-off error among the first 10 lambda. Choose the
% 11st lambda and use that to approximately calculate true coefficient
% matrix A.
A_app = ((lambda(11)*eye(N)+K_true*K_true)\K_true*B')';
% options_optim = optimoptions('fminunc','Algorithm','trust-region','SpecifyObjectiveGradient',true);
% problem.options = options_optim;
% problem.x0 = rand(N,p);
% problem.objective = @(Z)cost_func(Z,A_app,B,sigma_alg);
% problem.solver = 'fminunc';
% flag == 2: trust-region
% flag == 1: quasi-newton
flag = 2;
if flag == 2
options_optim = optimoptions(@fminunc,'Display','iter-detailed','Algorithm','trust-region','SpecifyObjectiveGradient',true);
fun = @(Z)cost_func(Z,A_app,B,sigma_alg);
Z0 = reshape(PC_X(:,[1,2])',[1,N*p]);
%Z0 = rand(1,N*p);
%Z0 = reshape(Z',[1,N*p]); % Reshape the initial N-by-p matrix Z into a row vector;
[Z_est,fval,exitflag,output] = fminunc(fun,Z0,options_optim);
Z_est_m = reshape(Z_est,[p,N])'; % Reshape the row vector Z_est_m into a N-by-p matrix
title_text2 = 'Estimate variaton sources, Z, by trust-region, Z_0 = 2-D PCA Scores';
else
options_optim = optimoptions(@fminunc,'Display','iter-detailed','Algorithm','quasi-newton','SpecifyObjectiveGradient',true,'MaxIterations',1000);
fun = @(Z)cost_func(Z,A_app,B,sigma_alg);
Z0 = reshape(PC_X(:,[1,2])',[1,N*p]);
%Z0 = rand(1,N*p);
%Z0 = reshape(Z',[1,N*p]);
[Z_est,fval,exitflag,output] = fminunc(fun,Z0,options_optim);
Z_est_m = reshape(Z_est,[p,N])';
title_text2 = 'Estimate variaton sources, Z, by quasi-netwon, Z_0 = 2-D PCA Scores';
end
if scat_func_tag == 1
tag = 0; %tag for showing index of data in 2d scatter plot (0: not showing)
scatter_label2d_func(Z_est_m,title_text2,dd,tag,psize,color) %Plot scatter plot of Z
elseif scat_func_tag == 2
tag = 1;
scatter_label2d_func(Z_est_m,title_text2,dd,tag,psize,color,c) %Plot scatter plot of Z
end
saveas(gca,[options.cwd,['2-4']],'jpg');
saveas(gca,[options.cwd,['2-4']],'fig');