您的位置:首页 > 其它

熵相关计算

2018-01-23 16:11 225 查看
#借用下python机器学习中的数据集

import numpy as np
from collections import Counter
from math import  log

def CreateDataSet():
dataset = np.array([[1, 1, 'yes' ],
[1, 1, 'yes' ],
[1, 0, 'no'],
[0, 1, 'no'],
[0, 1, 'no']])
return dataset

'''熵  h(x)  sum(-p(x)*log p(x) )'''
def cal_entropy(dataset,m): #m表示计算dataset中第k列的熵
feature_cnt=Counter(dataset[:,m])    #引入Counter进行频度统计,或者采用字典的方式进行累加计算也可以;
length=len(dataset)
res= sum( [ -(v/length)*log(v/length) for v in feature_cnt.values() ])
return res

datasets=CreateDataSet()  #这里label其实没什么用
print(cal_entropy(datasets,2))

'''联合熵,将两列特征拼接好后进行计算 h(x,y)  -p(x,y)*log p(x,y)'''

def cal_union_entropy(dataset,m,n):  #计算两个特征的联合熵
col1=dataset[:,m]
col2=dataset[:,n]
feature_cnt=Counter([str(col1[i])+col2[i] for i in range(len(col1))])
length=len(dataset)
res= sum( [ -(v/length)*log(v/length) for v in feature_cnt.values() ])
return res

print(cal_union_entropy(datasets,1,2))

'''条件熵   h(x,y)-h(x)  x确定时,y的不确定性 '''

def cal_condition_entropy(dataset,m,n):
return  cal_union_entropy(dataset,m,n)-cal_entropy(dataset,n)

print(cal_condition_entropy(datasets,1,2))

'''交叉熵 h(p,q)=sum(-p*log(q))'''

def cal_cross_entropy(dataset,m):
feature_cnt=Counter(dataset[:,m])    #引入Counter进行频度统计,或者采用字典的方式进行累加计算也可以;
length=len(dataset)
prob=[0.5,0.5]                  ###模拟一个q分布
value=list(feature_cnt.values())
res=sum([ - (value[i]/length)*log(prob[i]) for i in range(len(value)) ])
return res

print(cal_cross_entropy(datasets,2))

'''相对熵 kl(p||q)=sum(-p*log(q/p))'''

def cal_cross_entropy(dataset,m):
feature_cnt=Counter(dataset[:,m])    #引入Counter进行频度统计,或者采用字典的方式进行累加计算也可以;
length=len(dataset)
prob=[0.5,0.5]                  ###模拟一个q分布
value=list(feature_cnt.values())
res=sum([ - (value[i]/length)*log(prob[i]*length/value[i]) for i in range(len(value)) ])
return res

print(cal_cross_entropy(datasets,2))

分别输出:

0.6730116670092565

1.0549201679861442

0.38190850097688767

0.6931471805599453

0.020135513550688836
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: