您的位置:首页 > 其它

机器学习笔记(2)——逻辑回归LogisticRegression

2017-07-28 22:31 211 查看

逻辑回归LogisticRegression

线性分类:根据成绩预测出学生是否Pass的决策边界

1.前期import模块及设置

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy.optimize import minimize
from sklearn.preprocessing import PolynomialFeatures

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_seq_items', None)

#%config InlineBackend.figure_formats = {'pdf',}
%matplotlib inline

import seaborn as sns
sns.set_context('notebook')
sns.set_style('white')


2.导入并查看数据

data = np.loadtxt('data1.txt',delimeter=',')
print data


数据如下:

array([[ 34.62365962,  78.02469282,   0.        ],
[ 30.28671077,  43.89499752,   0.        ],
[ 35.84740877,  72.90219803,   0.        ],
[ 60.18259939,  86.3085521 ,   1.        ],
[ 79.03273605,  75.34437644,   1.        ],
……
[ 42.26170081,  87.10385094,   1.        ],
[ 99.31500881,  68.77540947,   1.        ],
[ 55.34001756,  64.93193801,   1.        ],
[ 74.775893  ,  89.5298129 ,   1.        ]])


建立用于线性代数运算的矩阵:

X = np.c_[np.ones((data.shape[0],1)),data[:,0:2]]
y = np.c_[data[:,2]]


3.输出散点图

neg = data[:,2] == 0       #计数fail的个数
pos = data[:,2] == 1       #计数pass的个数
axes = plt.gca()
axes.scatter(data[pos][:,0], data[pos][:,1], marker='+', c='k', s=60, linewidth=2, label='Pass')
axes.scatter(data[neg][:,0], data[neg][:,1], c='y', s=60, label='Fail')
axes.set_xlabel('Exam 1 score')
axes.set_ylabel('Exam 2 score')
axes.legend(frameon= True, fancybox = True) #图例设置




4.sigmoid函数

回归的结果输出是一个连续值,而值的范围是无法限定的,我们利用sigmoid函数把这个结果值映射为可以帮助我们判断的结果的值域为(0,1) 的一个概率值。

g(z)=11+e−zg(z)=11+e−z

def sigmoid(z):
return ( 1 / ( 1 + np.exp(-z)))


函数图象:



5.损失函数

线性回归的损失函数:

J(θ)=1m∑i=1m12(hθ(x(i))−y(i))2J(θ)=1m∑i=1m12(hθ(x(i))−y(i))2

但是hθ(x)hθ(x)取为逻辑回归中的g(θTX)g(θTX)会导致代价函数为“非凸”函数



这种情况不好计算最小值点

我们需要其他形式的CostFunction来保证逻辑回归的成本函数是凸函数



找到一个适合回归的代价函数:

Cost(hθ(x),y)={−log(hθ(x)) ,−log(1−hθ(x)),if y=1if y=0Cost(hθ(x),y)={−log(hθ(x)) ,if y=1−log(1−hθ(x)),if y=0

如果类别y=1y=1 而判定的hθ(x)=1hθ(x)=1 ,则Cost=0Cost=0

损失函数

J(θ)=1m∑i=1mCost(hθ(x(i)),y(i))J(θ)=1m∑i=1mCost(hθ(x(i)),y(i))

J(θ)=1m∑i=1m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]J(θ)=1m∑i=1m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]

向量化的损失函数(矩阵形式)

J(θ)=1m((log(g(Xθ))Ty+(log(1−g(Xθ))T(1−y))J(θ)=1m((log(g(Xθ))Ty+(log(1−g(Xθ))T(1−y))

求偏导(梯度)

δJ(θ)δθj=1m∑i=1m(hθ(x(i))−y(i))x(i)jδJ(θ)δθj=1m∑i=1m(hθ(x(i))−y(i))xj(i)

向量化的偏导(梯度)

δJ(θ)δθj=1mXT(g(Xθ)−y)δJ(θ)δθj=1mXT(g(Xθ)−y)

最小化J(θ)J(θ)的梯度下降算法:



代码:

def costFunction(theta, X, y):
m = y.size
h = sigmoid(X.dot(theta))
J = -1.0*(1.0/m)*(np.log(h).T.dot(y)+np.log(1-h).T.dot(1-y))
if np.isnan(J[0]):
return (np.inf)
return J[0]

def gradient(theta, X, y):
m = y.size
h = sigmoid(X.dot(theta.reshape(-1,1)))
grad =(1.0/m)*X.T.dot(h-y)
return(grad.flatten())


6.minimize最小化损失函数

initial_theta = np.zeros(X.shape[1])
res = minimize(costFunction, initial_theta, args=(X, y), jac=gradient, options={'maxiter'=400})
print res


输出结果:

status: 0
success: True
njev: 28
nfev: 28
hess_inv: array([[  3.24739469e+03,  -2.59380769e+01,  -2.63469561e+01],
[ -2.59380769e+01,   2.21449124e-01,   1.97772068e-01],
[ -2.63469561e+01,   1.97772068e-01,   2.29018831e-01]])
fun: 0.20349770158944075
x: array([-25.16133401,   0.20623172,   0.2014716 ])
message: 'Optimization terminated successfully.'
jac: array([ -2.73305312e-10,   1.43144026e-07,  -1.58965802e-07])


minimize函数:

minimize(fun, x0, args=(), method=None, jac=None, hess=None, hessp=None, constraints=(), tol=None, callback=None, options=None)


fun: 表示目标最小化函数

x0: 初始猜测的最小值(ndarray)

args: 传入对象函数中的参数和它的导数(derivatives)(Jacobian, Hessian矩阵)

jac: Jacobian (gradient) of objective function.

options: dict,可选选项(maxiter:(int)表示递归循环次数)

7.画出决策边界

plt.scatter(45, 85, s=60, c='r', marker='v', label='(45, 85)')
#预测样本
plotData(data, 'Exam 1 score', 'Exam 2 score', 'Admitted', 'Not admitted')
x1_min, x1_max = X[:,1].min(), X[:,1].max(),
x2_min, x2_max = X[:,2].min(), X[:,2].max(),
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
h = sigmoid(np.c_[np.ones((xx1.ravel().shape[0],1)), xx1.ravel(), xx2.ravel()].dot(res.x))
h = h.reshape(xx1.shape)
plt.contour(xx1, xx2, h, [0.5], linewidths=1, colors='b');


输出结果:



numpy.meshgrid():

x=[-3,-2,-1,0,1,2,3]
y=[-2,-1,0,1,2]
X,Y= np.meshgrid(x,y)
X,Y


这里meshigrid(x,y)的作用是产生一个以向量x为行,向量y为列的矩阵

而x是从-3开始到3,每间隔1记下一个数据,并把这些数据集成矩阵X

同理y则是从-2到2,每间隔1记下一个数据,并集成矩阵Y

X=
  -3 -2 -1 0 1 2 3
  -3 -2 -1 0 1 2 3
  -3 -2 -1 0 1 2 3
  -3 -2 -1 0 1 2 3
  -3 -2 -1 0 1 2 3
Y =
  -2 -2 -2 -2 -2 -2 -2
  -1 -1 -1 -1 -1 -1 -1
  0 0 0 0 0 0 0
  1 1 1 1 1 1 1
  2 2 2 2 2 2 2


numpy.linspace():

np.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)


>>> np.linspace(2.0, 3.0, num=5)
array([ 2.  ,  2.25,  2.5 ,  2.75,  3.  ])
>>> np.linspace(2.0, 3.0, num=5, endpoint=False)
array([ 2. ,  2.2,  2.4,  2.6,  2.8])
>>> np.linspace(2.0, 3.0, num=5, retstep=True)
(array([ 2.  ,  2.25,  2.5 ,  2.75,  3.  ]), 0.25)


numpy.flatten()与numpy.ravel():

相同作用都是多维数组变一维数组

>>> x = np.array([[1, 2], [3, 4]])
>>> x
array([[1, 2],
[3, 4]])
>>> x.flatten()
array([1, 2, 3, 4])
>>> x.ravel()
array([1, 2, 3, 4])
#两者默认均是行序优先
>>> x.flatten('F')
array([1, 3, 2, 4])
>>> x.ravel('F')
array([1, 3, 2, 4])
#不同在于flatten返回的是拷贝,而ravel返回的是view
>>> x.reshape(-1)
array([1, 2, 3, 4])
>>> x.T.reshape(-1)
array([1, 3, 2, 4])


numpy.reshape():

按照一定模型重新构造数组

a =array([[1,2,3],[4,5,6]])
a = a.reshape(6)
>>>a
array([1,2,3,4,5,6])


pyplot.contour():

绘制等高线图

plt.contour(xx, yy, h,[0.75]) #xx,yy表示横纵坐标块, h表示衡量指标, [a]表示h=a的等高线


axes_1 = plt.gca()
axes_1.scatter(data[pos][:,0], data[pos][:,1], marker='+', c='k', s=60, linewidth=2, label='Admitted')
axes_1.scatter(data[neg][:,0], data[neg][:,1], c='y', s=60, label='Not admitted')
axes_1.set_xlabel('Exam 1 score')
axes_1.set_ylabel('Exam 2 score')
axes_1.legend(frameon= True, fancybox = True)
plt.contour(xx1,xx2,h,[0.25], linewidths=1, colors='b')
plt.contour(xx1,xx2,h,[0.5], linewidths=1, colors='r')
plt.contour(xx1,xx2,h,[0.99], linewidths=1, colors='g')



uthEast)

8.预测准确率

def predict(theta, X):
'''''Predict label using learned logistic regression parameters'''
#注释方法
m, n = X.shape
p = np.zeros(shape=(m,1))
h = sigmoid(X.dot(theta.T))
for it in range(0, h.shape[0]):
if h[it]>0.5:
p[it,0]=1
else:
p[it,0]=0
return p
#Compute accuracy on our training set
p = predict(res.x, X)
print'Train Accuracy: %f'%((y[np.where(p == y)].size / float(y.size))*100.0)


输出结果:

Train Accuracy: 89.000000


numpy.where()

y[np.where(i ==1)] #返回满足逻辑语句判断条件的数组下标


详见https://github.com/icepoint666/MachineLearning

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: