|
16 | 16 | }
|
17 | 17 | ],
|
18 | 18 | "source": [
|
| 19 | + "#本文是协同过滤模型用于电影推荐\n", |
19 | 20 | "import scipy.io as sio\n",
|
20 | 21 | "import numpy as np\n",
|
21 | 22 | "import matplotlib.pyplot as plt\n",
|
|
77 | 78 | ],
|
78 | 79 | "source": [
|
79 | 80 | "#compute the cost\n",
|
80 |
| - "#note:no x0 = 1, no 1/m !!!!!!!!\n", |
| 81 | + "#note:no x0 = 1, no 1/m !!!!!!!!没有x0=1,也没有除以m\n", |
81 | 82 | "def computeCost(x,theta,y,r,reg=0):\n",
|
82 | 83 | " j = 0\n",
|
83 | 84 | " x_grad = np.zeros_like(x)\n",
|
|
93 | 94 | " return j,x_grad,theta_grad\n",
|
94 | 95 | " \n",
|
95 | 96 | "#Reduce the data set size so that this runs faster\n",
|
| 97 | + "#用一个小模型测试一下算法是否正确\n", |
96 | 98 | "nu, nm, nf = 4, 5, 3\n",
|
97 | 99 | "X1 = X[0:nm,0:nf]\n",
|
98 | 100 | "Theta1 = Theta[0:nu,0:nf]\n",
|
|
113 | 115 | "metadata": {},
|
114 | 116 | "outputs": [],
|
115 | 117 | "source": [
|
116 |
| - "#compute numerical_gradient\n", |
| 118 | + "#compute numerical_gradient数值计算梯度\n", |
117 | 119 | "def eval_numerical_gradient(f, x, verbose=True, h=0.00001):\n",
|
118 | 120 | " \"\"\" \n",
|
119 | 121 | " a naive implementation of numerical gradient of f at x \n",
|
|
162 | 164 | }
|
163 | 165 | ],
|
164 | 166 | "source": [
|
165 |
| - "#check the gradient \n", |
| 167 | + "#check the gradient ,进行梯度检查,一般都是产生小数字,小模型,因为数值梯度计算量太大\n", |
166 | 168 | "#create the small data\n",
|
167 | 169 | "\n",
|
168 | 170 | "x_t = np.random.rand(4,3)\n",
|
|
179 | 181 | "nm_t = 4\n",
|
180 | 182 | "nf_t = 3\n",
|
181 | 183 | "\n",
|
182 |
| - "#j,x_grad,theta_grad = computeCost(X1,Theta1,Y1,R1,0)\n", |
183 |
| - "#f = lambda W:computeCost(X1,Theta1,Y1,R1,0)[0]\n", |
| 184 | + "\n", |
184 | 185 | "j,x_grad,theta_grad = computeCost(X_t,Theta_t,y_t,r_t,1.5)\n",
|
185 | 186 | "f = lambda W:computeCost(X_t,Theta_t,y_t,r_t,1.5)[0] #f = J(x)\n",
|
186 | 187 | "x_grad1 = eval_numerical_gradient(f,X_t , verbose=False, h=0.00001)\n",
|
187 | 188 | "theta_grad1 = eval_numerical_gradient(f, Theta_t, verbose=False, h=0.00001)\n",
|
| 189 | + "#放在一起对比一下,前三列是x_grad,后三列是x_grad1\n", |
188 | 190 | "display_X = np.hstack((x_grad,x_grad1))\n",
|
189 | 191 | "print display_X\n",
|
190 | 192 | "display_Theta = np.hstack((theta_grad,theta_grad1))\n",
|
|
210 | 212 | }
|
211 | 213 | ],
|
212 | 214 | "source": [
|
213 |
| - "def load_data(filename):\n", |
| 215 | + "def load_data(filename):#导入电影数据\n", |
214 | 216 | " movieList = []\n",
|
215 | 217 | " file = open(filename)\n",
|
216 | 218 | " for line in file.readlines():\n",
|
|
279 | 281 | ],
|
280 | 282 | "source": [
|
281 | 283 | "#Learning Movie Ratings\n",
|
282 |
| - "\n", |
| 284 | + "#将新的用户的评分数据放入数据集合,进行训练\n", |
283 | 285 | "#Add our own ratings to the data matrix\n",
|
284 | 286 | "YY = np.hstack((my_ratings,Y))\n",
|
285 | 287 | "RR = np.hstack((my_ratings!=0,R))\n",
|
286 | 288 | "print YY.shape,RR.shape\n",
|
287 | 289 | "#Normalize Ratings\n",
|
288 |
| - "\n", |
| 290 | + "#进行数值归一化\n", |
289 | 291 | "def normalizeRatings(y,r):\n",
|
290 | 292 | " m,n = y.shape\n",
|
291 | 293 | " ymean = np.zeros((m,1))\n",
|
|
331 | 333 | ],
|
332 | 334 | "source": [
|
333 | 335 | "from scipy import optimize\n",
|
334 |
| - "\n", |
335 |
| - "#we use the scipy.optimize.fmin_cg,so we need to change the \n", |
336 |
| - "#function computeCost(),beacause x must be 1-D\n", |
| 336 | + "#还是使用优化算法去训练\n", |
337 | 337 | "\n",
|
338 | 338 | "args = (Ynorm,RR,num_users1,num_movies1,num_features1,1.5)\n",
|
339 | 339 | "params = np.hstack((XX.ravel(),TTheta.ravel())).ravel()\n",
|
340 |
| - "\n", |
| 340 | + "#we use the scipy.optimize.fmin_cg,so we need to change the \n", |
| 341 | + "#function computeCost(),beacause x must be 1-D\n", |
341 | 342 | "def Cost(params,*args):\n",
|
342 | 343 | " '''now params is 1-D,include [x,theta]'''\n",
|
343 | 344 | " y,r,nu,nm,nf,reg = args\n",
|
|
367 | 368 | " \n",
|
368 | 369 | "res = optimize.fmin_cg(Cost,x0=params,fprime=grad,args=args,maxiter=100)\n",
|
369 | 370 | "#get the bestX,bestTheta\n",
|
370 |
| - "\n", |
| 371 | + "#改变一下参数的shape\n", |
371 | 372 | "bestX = res[0:num_movies1*num_features1].reshape(num_movies1,num_features1)\n",
|
372 | 373 | "bestTheta = res[num_movies1*num_features1:].reshape(num_users1,num_features1)\n",
|
373 | 374 | "\n",
|
374 | 375 | "\n",
|
375 | 376 | "print bestX.shape,bestTheta.shape\n",
|
376 |
| - "\n", |
| 377 | + "#预测一下分数\n", |
377 | 378 | "score = bestX.dot(bestTheta.T) + Ymean\n",
|
378 |
| - "\n", |
| 379 | + "#只有第一行是新用户的分数\n", |
379 | 380 | "my_score = score[:,0] #line 1 is my scoce\n",
|
380 | 381 | "print score.shape\n",
|
381 | 382 | "print my_score[:5]\n",
|
| 383 | + "#排序,推荐最高的分数的电影给新用户\n", |
382 | 384 | "sort_index = my_score.argsort()\n",
|
383 | 385 | "favorite = 10\n",
|
384 | 386 | "for i in xrange(favorite):\n",
|
|
0 commit comments