-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLogisticRegression.py
124 lines (85 loc) · 3.38 KB
/
LogisticRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 20 12:27:16 2015
@author: Pavitrakumar
"""
from __future__ import division
import numpy as np
from sklearn import datasets
from numpy.linalg import inv
from sklearn import cross_validation
from sklearn.metrics import mean_squared_error
from scipy import optimize
def sigmoid(z):
g = 1.0 / (1.0 + np.exp(-z))
return g
"""
This function computes the cost (J) of using theta as parameter for
regularized logistic regression and the gradient (grad) of the cost
w.r.t to the parameters
"""
#if returnJ is True, function returns J
#if returnJ is False, function returns grad
def cost_function(theta,X,y,lam,returnJ = True):
#calculating cost
no_of_rows = len(y)
J = 0
h = sigmoid(np.dot(X,theta))
#J = (1.0/no_of_rows)*np.sum(np.multiply(-y,np.log(h))-np.multiply((1-y),np.log(1-h)),axis = 0)
# both are same! (above and below)
J = (1.0/no_of_rows)*(np.dot(-y.T,np.log(h))-np.dot((1-y).T,np.log(1-h)))
reg = (lam/(2.0*no_of_rows))*np.sum(np.power(theta[1:],2))
J = J + reg
if returnJ is True:
return J
#calculating gradient
grad = np.zeros(theta.shape)
error = h - y
grad[0] = (1.0/no_of_rows)*(np.dot(error.T,X))[0] #for theta0
grad[1:] = (((1.0/no_of_rows)*(np.dot(error.T,X)).T) + (lam/no_of_rows)*theta)[1:] # for rest of the theta terms except theta0
return grad
def returnJ(theta,X,y,lam):
return cost_function(theta,X,y,lam)
def returnThetaGrad(theta,X,y,lam):
return cost_function(theta,X,y,lam,False)
def fit(X,y,maxiter = 50,method = 'TNC',lam = 0.1):
no_of_rows = X.shape[0]
no_of_features = X.shape[1]
no_of_labels = len(set(y))
fit_theta = np.zeros((no_of_labels,no_of_features+1))
#adding a vector of ones to the X matrix(as the first column) - bias terms for each training exmaples
X = np.insert(X,0,1,axis=1)
initial_theta = np.zeros((no_of_features+1,1))
for i in range(no_of_labels):
temp_y = (y == (i)) + 0 # here labels are 0,1,2,3.. if they are 1,2,3,4... use: temp_y = (y == (i+1))+0
#temp_y is a vector of size no_of_training_examples
#since each iteration corresponds to finding theta for a single class (one-vs-all)
#each time, we only take the predection of class 'i'on all training example
_res = optimize.fmin_cg(returnJ, fprime=returnThetaGrad,x0 = initial_theta,args=(X, temp_y,lam), maxiter=50, disp=False, full_output=True)
fit_theta[i,:] = _res[0]
"""
different minimization functions (above and below)
"""
#options = {'maxiter': maxiter}
#_res = optimize.minimize(returnJ, initial_theta, jac=returnThetaGrad, method=method,args=(X, temp_y,lam), options=options)
#fit_theta[i,:] = _res.x
return fit_theta
def predict(theta,X):
X = np.insert(X,0,1,axis=1)
h = np.dot(X,theta.T)
print h
return h.argmax(1)
"""
testing using IRIS data set
"""
import sklearn.datasets as datasets
from sklearn import cross_validation
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.9)
#The optimized theta values after training using the training data
theta = fit(X_train,y_train)
pred = predict(theta,X_test)
from sklearn.metrics import accuracy_score
print accuracy_score(y_test,pred)