您的位置:首页 > 编程语言 > Python开发

逻辑(斯特)回归及python实现

2018-01-01 20:58 591 查看




























python代码实现:

from numpy import *
filename='bb.txt' #文件目录
def loadDataSet():   #读取数据(这里只有两个特征)
dataMat = []
labelMat = [] #列表
fr = open(filename)
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])   #前面的1,表示方程的常量。比如两个特征X1,X2,共需要三个参数,W1+W2*X1+W3*X2
labelMat.append(int(lineArr[2]))
return dataMat,labelMat

def sigmoid(inX):  #sigmoid函数
return 1.0/(1+exp(-inX))
def costFunction(theta, X, y):
m = X.shape[0]#number of training examples
theta = reshape(theta,(len(theta),1))

J =(1./m)*(-transpose(y).dot(log(sigmoid(X.dot(theta))))- transpose(1-y).dot(log(1-sigmoid(X.dot(theta)))))

return J[0][0]
#计算梯度
def compute_grad(theta ,X, y):
theta.shape =(1,3)
m = X.shape[0]
grad = zeros(3)
h = sigmoid(X.dot(theta.T))
delta = h - y
l = grad.size
for i in range(l):
sumdelta = delta.T.dot(X[:, i])
grad[i]=(1.0)* sumdelta*(-1)
theta.shape =(3,)
return  grad

def gradAscent(dataMat, labelMat):
dataMatrix=mat(dataMat) #将读取的数据转换为矩阵
classLabels=mat(labelMat).transpose() #将读取的数据转换为矩阵
m,n = shape(dataMatrix)
alpha = 0.001  #设置梯度的阀值,该值越大梯度下降幅度越大
maxCycles = 500 #设置迭代的次数,一般看实际数据进行设定,有些可能200次就够了
weights =array([ 1.,  1.,  1.])#设置初始的参数,并都赋默认值为1。注意这里权重以矩阵形式表示三个参数。
q=1
#迭代次数
for k in range(maxCycles):
print("迭代次数:")
print(q)
q=q+1
#获取梯度
grad = compute_grad(weights,dataMatrix,classLabels)
#根据梯度更新权重
for i in range(3):
weights[i]=weights[i]+alpha*grad[i]
print("损失:")
print(costFunction(weights,dataMatrix,classLabels))
return weights

def plotBestFit(weights):  #画出最终分类的图
import matplotlib.pyplot as plt
dataMat,labelMat=loadDataSet()
dataArr = array(dataMat)
n = shape(dataArr)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(n):
if int(labelMat[i])== 1:
xcord1.append(dataArr[i,1])
ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1])
ycord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = arange(-3,3)
y = (-weights[0]-weights[1]*x)/weights[2]
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()

def main():
dataMat, labelMat = loadDataSet()
weights=gradAscent(dataMat, labelMat)
print(weights)
plotBestFit(weights)

if __name__=='__main__':
main()


生成的拟合直线及点的分布:

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: