您的位置:首页 > 编程语言 > Python开发

Linear_regression与 Logistic_regression简单比较与python实现

2015-05-31 23:33 746 查看

Linear_regression与 Logistic_regression简单比较与python实现

好久没写博客了,在度厂实习期间更是天天累成了狗的节奏,最近有幸蹭到隔壁组老大小黑黑关于machine learning这块的培训(以下图片均摘自小黑黑的PPT),甚是感动,决定好好学习下这块的东西。

Linear_regression 和 Logistic_regression 其实是非常相似的两种算法。它们都属于监督学习,都可以用梯度下降等方法进行参数的迭代学习等等。

他们最大的不同应该说是 估价函数的不同。



此外Linear_regression 的 cost function:



Logistic_regression 的 cost function :



即我们的最终目标是要求出使得 J(theta)最小时theta的值。采取的方法均为类似梯度下降法的方法。





最后给出两种算法的python实现:

Linear_regression

import sys

MAX_FEATURE_DIMENSION = 1024
MAX_SAMPLE_NUMBER = 1024
MAX_ITERATE_NUMBER = 1024

##求导
def compute_gradient(x,y,theta,feature_number,feature_pos,sample_number):
sum = 0.0
for i in range(sample_number):
res = 0.0
for j in range(feature_number+1):
res += x[i][j] * theta[j]
sum += (res - y[i])*x[i][feature_pos]
return sum/sample_number

##估价函数
def compute_cost(x,y,theta,feature_number,sample_number):
sum = 0.0
for i in range(sample_number):
res = 0.0
for j in range(feature_number+1):
res += x[i][j] * theta[j]
sum += (res - y[i]) * (res - y[i])
return sum/(2*sample_number)

##梯度下降法
def gradient_descent(x,y,theta,feature_number,sample_number,alpha,iterate_number):
for i in range(iterate_number):
tmp = []
for j in range(MAX_FEATURE_DIMENSION):
tmp.append(0)
for j in range(feature_number+1):
tmp[j] = theta[j] - alpha * compute_gradient(x,y,theta,feature_number,j,sample_number)
for j in range(feature_number+1):
theta[j] = tmp[j]

##测试
def predict(theta,x,feature_number):
sum = 0.0
for i in range(feature_number+1):
sum += theta[i]*x[i]
return sum

if __name__ == '__main__':
x = [
[1,96.79,2,1,2],
[1,110.39,3,1,0],
[1,70.25,1,0,2],
[1,99.96,2,1,1],
[1,118.15,3,1,0],
[1,115.08,3,1,2]
]
y = [287,343,199,298,340,350]

sample_number = 6
alpha = 0.0001
iterate_number = 1500
feature_number = 4
theta = []
for i in range(101):
theta.append(0)
gradient_descent(x,y,theta,feature_number,sample_number,alpha,iterate_number)
print compute_cost(x,y,theta,feature_number,sample_number)

testx1 = [1,112,3,1,0]
testx2 = [1,110,3,1,1]

print predict(theta, testx1, 4)
print predict(theta, testx2, 4)


Logistic_regression

import sys
import math

MAX_FEATURE_DIMENSION = 1024
MAX_SAMPLE_NUMBER = 1024
MAX_ITERATE_NUMBER = 1024

##估价函数
def sigmoid(z):
return 1 / (1.0 + math.exp(-z))

def hypothesis(x, theta, feature_number):
h = 0.0
for i in range(feature_number+1):
h += x[i] * theta[i]
return sigmoid(h)

##计算偏导数
def compute_gradient(x, y, theta, feature_number, feature_pos, sample_number):
sum = 0.0
for i in range(sample_number):
h = hypothesis(x[i], theta, feature_number)
sum += (h - y[i]) * x[i][feature_pos]
return sum/sample_number

##代价
def compute_cost(x, y, theta, feature_number, sample_number):
sum = 0.0
for i in range(sample_number):
h = hypothesis(x[i], theta, feature_number)
sum += -y[i] * math.log(h) - (1 - y[i]) * math.log(1 - h)
return sum / sample_number

##梯度下降
def gradient_descent(x, y, theta, feature_number, sample_number, alpha, iterate_number):
for i in range(iterate_number):
tmp = []
for j in range(MAX_FEATURE_DIMENSION):
tmp.append(0)
for j in range(feature_number + 1):
tmp[j] = theta[j] - alpha * compute_gradient(x, y ,theta, feature_number, j, sample_number)
for j in range(feature_number + 1):
theta[j] = tmp[j]
print compute_cost(x, y, theta, feature_number, sample_number)

if __name__ == '__main__':
feature_number = 2
sample_number = 12
alpha = 0.001
iterate_number = 10

x = [
[1, 34.6, 78.0],
[1, 30.2, 43.8],
[1, 35.8, 72.9],
[1, 60.1, 86.3],
[1, 79.0, 75.3],
[1, 45.0, 56.3],
[1, 61.1, 96.5],
[1, 75.0, 46.5],
[1, 76.0, 87.4],
[1, 84.4, 43.5],
[1, 95.8, 38.2],
[1, 75.0, 30.6]
]

y = [0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0]

theta = []
for i in range(MAX_FEATURE_DIMENSION):
theta.append(0)

gradient_descent(x, y, theta, feature_number, sample_number, alpha, iterate_number)

outstr = ""
for i in range(3):
outstr += "\t".join([str(theta[i])])
print outstr
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: