您的位置:首页 > 编程语言 > Python开发

Python-基于物品的协同过滤算法(附两种算法的对比)

2017-09-10 19:32 441 查看
与上一篇UserCF数据来源一致。

先贴代码ItemCF:

#coding=utf-8

import math

class ItemCF:

def __init__(self,basefile,testfile):
self.datafile = basefile
self.testfile = testfile
self.readData()
self.readTestData()

def readData(self):
self.traindata = {}
for line in open(self.datafile):
userid,itemid,record,_ = line.split()
self.traindata.setdefault(userid,{})
self.traindata[userid][itemid] = int(record)

def readTestData(self):
self.testdata = {}
for line in open(self.testfile):
userid,itemid,record,_ = line.split()
self.testdata.setdefault(userid,{})
self.testdata[userid][itemid] = int(record)

def ItemSimilarity(self):
train = self.traindata
C = dict()
N = dict()
for u, items in train.items():
for i in items.keys():
N.setdefault(i,0)
N[i] += 1
for j in items.keys():
if i == j:
continue
C.setdefault(i,{})
C[i].setdefault(j,0)
C[i][j] += 1
self.itermSimBest = dict()#物品与物品之间的相似度
for i,related_items in C.items():
self.itermSimBest.setdefault(i,{})
for j, cij in related_items.items():
self.itermSimBest[i].setdefault(j,0);
self.itermSimBest[i][j] = cij / math.sqrt(N[i] * N[j])

def Recommendation(self,user_id,K = 8,nitem = 40):
train = self.traindata
rank = dict()
ru = train.get(user_id)#用户历史记录
for i,pi in ru.items():
for j, wj in sorted(self.itermSimBest[i].items(),key=lambda x : x[1],reverse=True)[0:K]:
if j in ru:
continue
rank.setdefault(j,0)
rank[j] += pi * wj
return  dict(sorted(rank.items(),key = lambda x :x[1],reverse = True)[0:nitem])

def recallAndPrecision(self,test = None,k = 8,nitem = 10):
train  = self.traindata
test = self.testdata
hit = 0
recall = 0
precision = 0
for user in train.keys():
tu = test.get(user,{})
rank = self.Recommendation(user,k,nitem)
for item,_ in rank.items():
if item in tu:
hit += 1
recall += len(tu)
precision += nitem
return (hit / (recall * 1.0),hit / (precision * 1.0))

def testUserCF():

cf = ItemCF("train.txt","test.txt")
cf.ItemSimilarity()
print("%5s%5s%20s%20s" % ('K','N',"recall",'precision'))
for k in [5,10,20,40,80,160]:
for nitem in [5,10,15,20]:
recall,precision = cf.recallAndPrecision( k=k,nitem=nitem )
print("%5d%5d%19.3f%%%19.3f%%" % (k,nitem,recall * 100,precision * 100))

if __name__=='__main__':
testUserCF()


UserCF和ItemCF的综合比较:

UserCF(适用新闻推荐等)

给用户推荐那些和他相同兴趣爱好的用户喜欢的物品,反映用户所在的小型兴趣群体中物品的热门程度。

ItemCF(适用图书、电商、电影网站等)

给用户推荐那些和他之前喜欢的物品类似的物品,更加个性化,反映了用户自己的兴趣传承。

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息