您的位置:首页 > 其它

协同过滤 推荐系统实战

2017-07-23 14:12 330 查看
本文基于http://blog.csdn.net/gamer_gyt/article/details/51346159

首先感谢原作者乐于分享精神。

做了如下修改:1、将数据划分为训练集和测试集,增加计算召回率与精确率的函数

2、userCF关于推荐物品感兴趣度计算方法做了修改

#-*-coding:utf-8-*-
'''
Created on 2016年5月2日

@author: Gamer Think
'''
from math import sqrt
import random
fp = open("uid_score_bid","r")

users = {}
test  = {}
random.seed()
M=1
k=1
for line in open("uid_score_bid"):
lines = line.strip().split(",")
if random.randint(0,M)==k:
if lines[0] not in test:
test[lines[0]] = {}
test[lines[0]][lines[2]]=float(lines[1])#这是一种键中键用法

else:
if lines[0] not in users:
users[lines[0]] = {}
users[lines[0]][lines[2]]=float(lines[1])#这是一种键中键用法

#----------------新增代码段END----------------------

class recommender:
#data:数据集,这里指users
#k:表示得出最相近的k的近邻
#metric:表示使用计算相似度的方法
#n:表示推荐book的个数
def __init__(self, data, k=12, metric='pearson', n=12):

self.k = k
self.n = n
self.username2id = {}
self.userid2name = {}
self.productid2name = {}

self.metric = metric
if self.metric == 'pearson':
self.fn = self.pearson
if type(data).__name__ == 'dict':
self.data = data
def convertProductID2name(self, id):

if id in self.productid2name:
return self.productid2name[id]
else:
return id

#定义的计算相似度的公式,用的是皮尔逊相关系数计算方法
def pearson(self, rating1, rating2):
sum_xy = 0
sum_x = 0
sum_y = 0
sum_x2 = 0
sum_y2 = 0
n = 0
for key in rating1:
if key in rating2:
n += 1
x = rating1[key]
y = rating2[key]
sum_xy += x * y
sum_x += x
sum_y += y
sum_x2 += pow(x, 2)
sum_y2 += pow(y, 2)
if n == 0:
return 0

#皮尔逊相关系数计算公式
denominator = sqrt(sum_x2 - pow(sum_x, 2) / n)  * sqrt(sum_y2 - pow(sum_y, 2) / n)
if denominator == 0:
return 0
else:
return (sum_xy - (sum_x * sum_y) / n) / denominator

def computeNearestNeighbor(self, username):#计算了所有观众与user的相似度,给出一个降序表
distances = []
for instance in self.data:
if instance != username:
distance = self.fn(self.data[username],self.data[instance])
distances.append((instance, distance))

distances.sort(key=lambda artistTuple: artistTuple[1],reverse=True)
return distances

#推荐算法的主体函数
def recommend(self, user):
#定义一个字典,用来存储推荐的书单和分数
recommendations = {}
#计算出user与所有其他用户的相似度,返回一个list
nearest = self.computeNearestNeighbor(user)
# print nearest

userRatings = self.data[user]
#print   userRatings

totalDistance = {}
#得住最近的k个近邻的总距离
'''for i in range(self.k):
totalDistance += nearest[i][1]
if totalDistance==0.0:
totalDistance=1.0
'''
#将与user最相近的k个人中user没有看过的书推荐给user,并且这里又做了一个分数的计算排名
for i in range(self.k):

#第i个人的与user的相似度,转换到[0,1]之间
#weight = nearest[i][1] / totalDistance

#第i个人的name
name = nearest[i][0]

#第i个用户看过的书和相应的打分
neighborRatings = self.data[name]

for artist in neighborRatings:
if not artist in userRatings:
# the nearest of i and user
weight = nearest[i][1]

if artist not in recommendations:
recommendations[artist] = (neighborRatings[artist] * weight)
totalDistance[artist] = (weight)
else:
recommendations[artist] = (recommendations[artist]+ neighborRatings[artist] * weight)
totalDistance[artist] = (totalDistance[artist] + weight)
for reclist in recommendations.keys():
# the rank of modified
recommendations[reclist] =  recommendations[reclist] / totalDistance[reclist]

recommendations = list(recommendations.items())
recommendations = [(self.convertProductID2name(k), v)for (k, v) in recommendations]

#做了一个排序
recommendations.sort(key=lambda artistTuple: artistTuple[1], reverse = True)

return recommendations[:self.n],nearest
#计算召回率和精确率
def recall(self,testt):
hit = 0
al  = 0
al_N=0
for user_a in self.data.keys():
tu=test[user_a].keys()
rank,ne=self.recommend(user_a)
for items_a in rank:
if items_a[0] in tu:
hit +=1
al +=len(tu)
al_N +=self.n

return [hit / (al *1.0)],[ hit /(1.0*al_N)]

def adjustrecommend(id):
bookid_list = []
r = recommender(users)
k,nearuser = r.recommend("%s" % id)# k is the (book , similary) while nearuser is (user ,similary)
rec,pre, =r.recall(test)           #计算推荐系统召回率和准确率
print rec,pre
for i in range(len(k)):
bookid_list.append(k[i][0])
return bookid_list,nearuser[:15]        #bookid_lis

bookid_list,near_list = adjustrecommend("changanamei")
print '.................................................................'
#print ("bookid_list:",bookid_list)
#print ("near_list:",near_list)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: