您的位置：首页 > 其它

机器学习笔记(2)——逻辑回归LogisticRegression

2017-07-28 22:31 211 查看

逻辑回归LogisticRegression

线性分类：根据成绩预测出学生是否Pass的决策边界

1.前期import模块及设置

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy.optimize import minimize
from sklearn.preprocessing import PolynomialFeatures

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 150)
pd.set_option('display.max_seq_items', None)

#%config InlineBackend.figure_formats = {'pdf',}
%matplotlib inline

import seaborn as sns
sns.set_context('notebook')
sns.set_style('white')

2.导入并查看数据

data = np.loadtxt('data1.txt',delimeter=',')
print data

数据如下：

array([[ 34.62365962,  78.02469282,   0.        ],
[ 30.28671077,  43.89499752,   0.        ],
[ 35.84740877,  72.90219803,   0.        ],
[ 60.18259939,  86.3085521 ,   1.        ],
[ 79.03273605,  75.34437644,   1.        ],
……
[ 42.26170081,  87.10385094,   1.        ],
[ 99.31500881,  68.77540947,   1.        ],
[ 55.34001756,  64.93193801,   1.        ],
[ 74.775893  ,  89.5298129 ,   1.        ]])

建立用于线性代数运算的矩阵：

X = np.c_[np.ones((data.shape[0],1)),data[:,0:2]]
y = np.c_[data[:,2]]

3.输出散点图

neg = data[:,2] == 0       #计数fail的个数
pos = data[:,2] == 1       #计数pass的个数
axes = plt.gca()
axes.scatter(data[pos][:,0], data[pos][:,1], marker='+', c='k', s=60, linewidth=2, label='Pass')
axes.scatter(data[neg][:,0], data[neg][:,1], c='y', s=60, label='Fail')
axes.set_xlabel('Exam 1 score')
axes.set_ylabel('Exam 2 score')
axes.legend(frameon= True, fancybox = True) #图例设置

4.sigmoid函数

回归的结果输出是一个连续值，而值的范围是无法限定的，我们利用sigmoid函数把这个结果值映射为可以帮助我们判断的结果的值域为(0,1) 的一个概率值。

g(z)=11+e−zg(z)=11+e−z

def sigmoid(z):
return ( 1 / ( 1 + np.exp(-z)))

函数图象：

5.损失函数

线性回归的损失函数：

J(θ)=1m∑i=1m12(hθ(x(i))−y(i))2J(θ)=1m∑i=1m12(hθ(x(i))−y(i))2

但是hθ(x)hθ(x)取为逻辑回归中的g(θTX)g(θTX)会导致代价函数为“非凸”函数

这种情况不好计算最小值点

我们需要其他形式的CostFunction来保证逻辑回归的成本函数是凸函数

找到一个适合回归的代价函数：

Cost(hθ(x),y)={−log(hθ(x)) ,−log(1−hθ(x)),if y=1if y=0Cost(hθ(x),y)={−log(hθ(x)) ,if y=1−log(1−hθ(x)),if y=0

如果类别y=1y=1 而判定的hθ(x)=1hθ(x)=1 ,则Cost=0Cost=0

损失函数

J(θ)=1m∑i=1mCost(hθ(x(i)),y(i))J(θ)=1m∑i=1mCost(hθ(x(i)),y(i))

J(θ)=1m∑i=1m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]J(θ)=1m∑i=1m[−y(i)log(hθ(x(i)))−(1−y(i))log(1−hθ(x(i)))]

向量化的损失函数(矩阵形式)

J(θ)=1m((log(g(Xθ))Ty+(log(1−g(Xθ))T(1−y))J(θ)=1m((log(g(Xθ))Ty+(log(1−g(Xθ))T(1−y))

求偏导(梯度)

δJ(θ)δθj=1m∑i=1m(hθ(x(i))−y(i))x(i)jδJ(θ)δθj=1m∑i=1m(hθ(x(i))−y(i))xj(i)

向量化的偏导(梯度)

δJ(θ)δθj=1mXT(g(Xθ)−y)δJ(θ)δθj=1mXT(g(Xθ)−y)

最小化J(θ)J(θ)的梯度下降算法：

代码：

def costFunction(theta, X, y):
m = y.size
h = sigmoid(X.dot(theta))
J = -1.0*(1.0/m)*(np.log(h).T.dot(y)+np.log(1-h).T.dot(1-y))
if np.isnan(J[0]):
return (np.inf)
return J[0]

def gradient(theta, X, y):
m = y.size
h = sigmoid(X.dot(theta.reshape(-1,1)))
grad =(1.0/m)*X.T.dot(h-y)
return(grad.flatten())

6.minimize最小化损失函数

initial_theta = np.zeros(X.shape[1])
res = minimize(costFunction, initial_theta, args=(X, y), jac=gradient, options={'maxiter'=400})
print res

输出结果：

status: 0
success: True
njev: 28
nfev: 28
hess_inv: array([[  3.24739469e+03,  -2.59380769e+01,  -2.63469561e+01],
[ -2.59380769e+01,   2.21449124e-01,   1.97772068e-01],
[ -2.63469561e+01,   1.97772068e-01,   2.29018831e-01]])
fun: 0.20349770158944075
x: array([-25.16133401,   0.20623172,   0.2014716 ])
message: 'Optimization terminated successfully.'
jac: array([ -2.73305312e-10,   1.43144026e-07,  -1.58965802e-07])

minimize函数：

minimize(fun, x0, args=(), method=None, jac=None, hess=None, hessp=None, constraints=(), tol=None, callback=None, options=None)

fun: 表示目标最小化函数

x0: 初始猜测的最小值（ndarray）

args: 传入对象函数中的参数和它的导数（derivatives）(Jacobian, Hessian矩阵)

jac: Jacobian (gradient) of objective function.

options: dict，可选选项（maxiter:(int)表示递归循环次数）

7.画出决策边界

plt.scatter(45, 85, s=60, c='r', marker='v', label='(45, 85)')
#预测样本
plotData(data, 'Exam 1 score', 'Exam 2 score', 'Admitted', 'Not admitted')
x1_min, x1_max = X[:,1].min(), X[:,1].max(),
x2_min, x2_max = X[:,2].min(), X[:,2].max(),
xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max))
h = sigmoid(np.c_[np.ones((xx1.ravel().shape[0],1)), xx1.ravel(), xx2.ravel()].dot(res.x))
h = h.reshape(xx1.shape)
plt.contour(xx1, xx2, h, [0.5], linewidths=1, colors='b');

输出结果：

numpy.meshgrid():

x=[-3,-2,-1,0,1,2,3]
y=[-2,-1,0,1,2]
X,Y= np.meshgrid(x,y)
X,Y

这里meshigrid（x，y）的作用是产生一个以向量x为行，向量y为列的矩阵

而x是从-3开始到3，每间隔1记下一个数据，并把这些数据集成矩阵X

同理y则是从-2到2，每间隔1记下一个数据，并集成矩阵Y

X=
　　-3 -2 -1 0 1 2 3
　　-3 -2 -1 0 1 2 3
　　-3 -2 -1 0 1 2 3
　　-3 -2 -1 0 1 2 3
　　-3 -2 -1 0 1 2 3
Y =
　　-2 -2 -2 -2 -2 -2 -2
　　-1 -1 -1 -1 -1 -1 -1
　　0 0 0 0 0 0 0
　　1 1 1 1 1 1 1
　　2 2 2 2 2 2 2

numpy.linspace():

np.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)

>>> np.linspace(2.0, 3.0, num=5)
array([ 2.  ,  2.25,  2.5 ,  2.75,  3.  ])
>>> np.linspace(2.0, 3.0, num=5, endpoint=False)
array([ 2. ,  2.2,  2.4,  2.6,  2.8])
>>> np.linspace(2.0, 3.0, num=5, retstep=True)
(array([ 2.  ,  2.25,  2.5 ,  2.75,  3.  ]), 0.25)

numpy.flatten()与numpy.ravel():

相同作用都是多维数组变一维数组

>>> x = np.array([[1, 2], [3, 4]])
>>> x
array([[1, 2],
[3, 4]])
>>> x.flatten()
array([1, 2, 3, 4])
>>> x.ravel()
array([1, 2, 3, 4])
#两者默认均是行序优先
>>> x.flatten('F')
array([1, 3, 2, 4])
>>> x.ravel('F')
array([1, 3, 2, 4])
#不同在于flatten返回的是拷贝，而ravel返回的是view
>>> x.reshape(-1)
array([1, 2, 3, 4])
>>> x.T.reshape(-1)
array([1, 3, 2, 4])

numpy.reshape():

按照一定模型重新构造数组

a =array([[1,2,3],[4,5,6]])
a = a.reshape(6)
>>>a
array([1,2,3,4,5,6])

pyplot.contour():

绘制等高线图

plt.contour(xx, yy, h,[0.75]) #xx,yy表示横纵坐标块， h表示衡量指标， [a]表示h=a的等高线

axes_1 = plt.gca()
axes_1.scatter(data[pos][:,0], data[pos][:,1], marker='+', c='k', s=60, linewidth=2, label='Admitted')
axes_1.scatter(data[neg][:,0], data[neg][:,1], c='y', s=60, label='Not admitted')
axes_1.set_xlabel('Exam 1 score')
axes_1.set_ylabel('Exam 2 score')
axes_1.legend(frameon= True, fancybox = True)
plt.contour(xx1,xx2,h,[0.25], linewidths=1, colors='b')
plt.contour(xx1,xx2,h,[0.5], linewidths=1, colors='r')
plt.contour(xx1,xx2,h,[0.99], linewidths=1, colors='g')

uthEast)

8.预测准确率

def predict(theta, X):
'''''Predict label using learned logistic regression parameters'''
#注释方法
m, n = X.shape
p = np.zeros(shape=(m,1))
h = sigmoid(X.dot(theta.T))
for it in range(0, h.shape[0]):
if h[it]>0.5:
p[it,0]=1
else:
p[it,0]=0
return p
#Compute accuracy on our training set
p = predict(res.x, X)
print'Train Accuracy: %f'%((y[np.where(p == y)].size / float(y.size))*100.0)

输出结果：

Train Accuracy: 89.000000

numpy.where（）

y[np.where(i ==1)] #返回满足逻辑语句判断条件的数组下标

详见https://github.com/icepoint666/MachineLearning

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航