小白学习机器学习---第三章(3):二分类LDA的python实现
2018-03-28 10:57
483 查看
上一节理论推导了LDA的实现,下面放上实现的python代码。"""
线性判别分析步骤:
1.把来自两类w1,w2的训练样本集X分成X1与X2
2.计算各类样本的均值向量m1,m2
3.计算样本类内散度矩阵s1,s2
4.计算总的类内散度矩阵 Sw=S1+S2
5.计算Sw的逆矩阵Sw2
6.求解权向量w=Sw2(m1-m2)
7.计算 g(x)=WT(x-1/2(m1+m2)) 根据是否大于0,判断分类
"""
#coding=utf-8
from numpy import *
import matplotlib.pyplot as plt
###导入数据
def createDataSet():
#group1=[[0.697,0.460],[0.774,0.376],[0.634,0.264],[0.608,0.318],[0.556,0.215],[0.403,0.211],[0.481,0.149],[0.437,0.211]]#好瓜
#group2=[[0.666,0.091],[0.243,0.267],[0.245,0.057],[0.343,0.099],[0.639,0.161],[0.657,0.198],[0.360,0.370],[0.593,0.042]]#烂瓜
group1=[];group2=[]
i=0
while i<10:
group1.append(random.random(2)*3)#random.random(i) : 生成具有i个元素的[],每个元素取值为0到1
group2.append((random.random(2)*20))
i+=1
group1=mat(group1)
group2=mat(group2)
#print(group1)
# print(group2)
return group1,group2
###计算样本均值
def compute_mean(group):
x1=0;x2=0
for a in group:
x1+=array(a)[0][0]
x2+=array(a)[0][1]
x1=x1/len(group)
x2=x2/len(group)
#m=np.mean(group,0) np.mean(group,0) # 压缩行,对各列求均值,返回一个1*n矩阵,若为1,则对各行求均值,返回m*1矩阵,若为2,则对所有数求一个均值,返回一个数
# print('平均值:',x1,x2)
return mat([x1,x2])
###计算样本类内散度矩阵
def compute_scatter(group,mean):
m,d=shape(group)#m为样本个数,d为样本维度
#将所有样本向量-均值向量
group_mean=group-mean#虽然长度不匹配,但是维度匹配就可以计算
#初始化散度矩阵
s_in=mat([[0,0],[0,0]])
for i in range(m):
x=mat(array(group_mean)[i])
# print('x=:',x)
s_in=s_in+dot(x.T,x) ###X.T 获得矩阵X的转置,dot为矩阵乘法运算
#print('s_in',s_in)
return s_in
group1,group2=createDataSet()
mean1=compute_mean(group1)
mean2=compute_mean(group2)
s_in1=compute_scatter(group1,mean1)
s_in2=compute_scatter(group2,mean2)
#求类内总散度
s_sum=s_in1+s_in2
print("类内散度矩阵:",s_sum)
#求类内散度矩阵的逆矩阵
s_rev=s_sum.I ### X.I 返回矩阵X的逆矩阵
print('s_rev:',s_rev)
#求权向量W
meanW=(mean1-mean2).T
print("权向量W:",meanW)
w=dot(s_rev,meanW)
print('w:',w)
print("----------------------")
# for a in group1:
# distance=dot(w.T,a.T)
# mean3=0.5*(mean1+mean2)
# meanD=dot(w.T,mean3.T)
# print(distance-meanD)
# print("-----------------")
# for a in group2:
# distance=dot(w.T,a.T)
# mean3=0.5*(mean1+mean2)
# meanD=dot(w.T,mean3.T)
# print(distance-meanD)
#判断测试集是哪一类
xcord1=[];ycord1=[]
xcord2=[];ycord2=[]
#这两个用来放真实的label分组
xcord3=[];ycord3=[]
xcord4=[];ycord4=[]
w2=array(w)
#通过LDA实现的分类图
for a in group1:
item=array(a)[0]
distance=dot(w.T,a.T)
mean3=0.5*(mean1+mean2)
meanDistance=dot(w.T,mean3.T)
if((distance-meanDistance)>0):
xcord1.append(item[0])
ycord1.append(item[1])
else:
xcord2.append(item[0])
ycord2.append(item[1])
for a in group2:
item=array(a)[0]
distance=dot(w.T,a.T)
mean3=0.5*(mean1+mean2)
meanDistance=dot(w.T,mean3.T)
if((distance-meanDistance)>0):
xcord1.append(item[0])
ycord1.append(item[1])
else:
xcord2.append(item[0])
ycord2.append(item[1])
#画出LDA的分布图
fig=plt.figure()
ax=fig.add_subplot(111)
ax.set_title("LDA")
plt.xlabel('X')
plt.ylabel('Y')
ax.scatter(xcord1,ycord1,s=30,c='red',marker='s')
ax.scatter(xcord2,ycord2,s=30,c='blue')
#画出真实分类图
for a in group1:
item=array(a)[0]
xcord3.append(item[0])
ycord3.append(item[1])
for a in group2:
item=array(a)[0]
xcord4.append(item[0])
ycord4.append(item[1])
fig=plt.figure(num=3)
ax=fig.add_subplot(111)
ax.set_title("realLabel")
plt.xlabel('X')
plt.ylabel('Y')
ax.scatter(xcord3,ycord3,s=30,c='red',marker='s')
ax.scatter(xcord4,ycord4,s=30,c='blue')
plt.show()
线性判别分析步骤:
1.把来自两类w1,w2的训练样本集X分成X1与X2
2.计算各类样本的均值向量m1,m2
3.计算样本类内散度矩阵s1,s2
4.计算总的类内散度矩阵 Sw=S1+S2
5.计算Sw的逆矩阵Sw2
6.求解权向量w=Sw2(m1-m2)
7.计算 g(x)=WT(x-1/2(m1+m2)) 根据是否大于0,判断分类
"""
#coding=utf-8
from numpy import *
import matplotlib.pyplot as plt
###导入数据
def createDataSet():
#group1=[[0.697,0.460],[0.774,0.376],[0.634,0.264],[0.608,0.318],[0.556,0.215],[0.403,0.211],[0.481,0.149],[0.437,0.211]]#好瓜
#group2=[[0.666,0.091],[0.243,0.267],[0.245,0.057],[0.343,0.099],[0.639,0.161],[0.657,0.198],[0.360,0.370],[0.593,0.042]]#烂瓜
group1=[];group2=[]
i=0
while i<10:
group1.append(random.random(2)*3)#random.random(i) : 生成具有i个元素的[],每个元素取值为0到1
group2.append((random.random(2)*20))
i+=1
group1=mat(group1)
group2=mat(group2)
#print(group1)
# print(group2)
return group1,group2
###计算样本均值
def compute_mean(group):
x1=0;x2=0
for a in group:
x1+=array(a)[0][0]
x2+=array(a)[0][1]
x1=x1/len(group)
x2=x2/len(group)
#m=np.mean(group,0) np.mean(group,0) # 压缩行,对各列求均值,返回一个1*n矩阵,若为1,则对各行求均值,返回m*1矩阵,若为2,则对所有数求一个均值,返回一个数
# print('平均值:',x1,x2)
return mat([x1,x2])
###计算样本类内散度矩阵
def compute_scatter(group,mean):
m,d=shape(group)#m为样本个数,d为样本维度
#将所有样本向量-均值向量
group_mean=group-mean#虽然长度不匹配,但是维度匹配就可以计算
#初始化散度矩阵
s_in=mat([[0,0],[0,0]])
for i in range(m):
x=mat(array(group_mean)[i])
# print('x=:',x)
s_in=s_in+dot(x.T,x) ###X.T 获得矩阵X的转置,dot为矩阵乘法运算
#print('s_in',s_in)
return s_in
group1,group2=createDataSet()
mean1=compute_mean(group1)
mean2=compute_mean(group2)
s_in1=compute_scatter(group1,mean1)
s_in2=compute_scatter(group2,mean2)
#求类内总散度
s_sum=s_in1+s_in2
print("类内散度矩阵:",s_sum)
#求类内散度矩阵的逆矩阵
s_rev=s_sum.I ### X.I 返回矩阵X的逆矩阵
print('s_rev:',s_rev)
#求权向量W
meanW=(mean1-mean2).T
print("权向量W:",meanW)
w=dot(s_rev,meanW)
print('w:',w)
print("----------------------")
# for a in group1:
# distance=dot(w.T,a.T)
# mean3=0.5*(mean1+mean2)
# meanD=dot(w.T,mean3.T)
# print(distance-meanD)
# print("-----------------")
# for a in group2:
# distance=dot(w.T,a.T)
# mean3=0.5*(mean1+mean2)
# meanD=dot(w.T,mean3.T)
# print(distance-meanD)
#判断测试集是哪一类
xcord1=[];ycord1=[]
xcord2=[];ycord2=[]
#这两个用来放真实的label分组
xcord3=[];ycord3=[]
xcord4=[];ycord4=[]
w2=array(w)
#通过LDA实现的分类图
for a in group1:
item=array(a)[0]
distance=dot(w.T,a.T)
mean3=0.5*(mean1+mean2)
meanDistance=dot(w.T,mean3.T)
if((distance-meanDistance)>0):
xcord1.append(item[0])
ycord1.append(item[1])
else:
xcord2.append(item[0])
ycord2.append(item[1])
for a in group2:
item=array(a)[0]
distance=dot(w.T,a.T)
mean3=0.5*(mean1+mean2)
meanDistance=dot(w.T,mean3.T)
if((distance-meanDistance)>0):
xcord1.append(item[0])
ycord1.append(item[1])
else:
xcord2.append(item[0])
ycord2.append(item[1])
#画出LDA的分布图
fig=plt.figure()
ax=fig.add_subplot(111)
ax.set_title("LDA")
plt.xlabel('X')
plt.ylabel('Y')
ax.scatter(xcord1,ycord1,s=30,c='red',marker='s')
ax.scatter(xcord2,ycord2,s=30,c='blue')
#画出真实分类图
for a in group1:
item=array(a)[0]
xcord3.append(item[0])
ycord3.append(item[1])
for a in group2:
item=array(a)[0]
xcord4.append(item[0])
ycord4.append(item[1])
fig=plt.figure(num=3)
ax=fig.add_subplot(111)
ax.set_title("realLabel")
plt.xlabel('X')
plt.ylabel('Y')
ax.scatter(xcord3,ycord3,s=30,c='red',marker='s')
ax.scatter(xcord4,ycord4,s=30,c='blue')
plt.show()
相关文章推荐
- 小白学习机器学习---第三章(2):对数几率回归python实现
- 小白学习机器学习---第三章:简单线性模型Python实现
- 小白学习机器学习---第五章:神经网络简单模型python实现
- 【机器学习实验】学习Python来分类现实世界的数据
- 机器学习实战笔记(Python实现)-07-模型评估与分类性能度量
- 【机器学习】python实践笔记 -- 经典监督学习模型之分类学习模型
- 机器学习算法的Python实现 (1):logistics回归 与 线性判别分析(LDA)
- Python单例模式的4种实现方法 分类: python学习 2015-05-08 16:49 42人阅读 评论(0) 收藏
- 神经网络与深度学习 使用Python实现基于梯度下降算法的神经网络和自制仿MNIST数据集的手写数字分类可视化程序 web版本
- 小白学习Machine Learning in Action-机器学习实战------Python基础
- 贝叶斯分类方法学习三 python+jieba+mongodb实现朴素贝叶斯新闻文本自动分类
- 小白学习Machine Learning in Action-机器学习实战------分类之k近邻算法
- 【机器学习算法-python实现】K-means无监督学习实现分类
- 机器学习入门学习笔记:(2.2)线性回归python程序实现
- python机器学习---用贝叶斯算法实现垃圾邮件分类预测
- 机器学习基本知识以及几种分类算法的Python实现(适合入门)
- 【机器学习实验】学习Python来分类现实世界的数据
- 神经网络与深度学习 1.6 使用Python实现基于梯度下降算法的神经网络和MNIST数据集的手写数字分类程序
- 机器学习经典算法详解及Python实现--CART分类决策树、回归树和模型树
- Python实现基于朴素贝叶斯的垃圾邮件分类 标签: python朴素贝叶斯垃圾邮件分类 2016-04-20 15:09 2750人阅读 评论(1) 收藏 举报 分类: 机器学习(19) 听说