您的位置:首页 > 编程语言 > Python开发

个性化推荐算法python实现

2015-09-30 15:24 627 查看
基于ItemCF算法

# -*- coding: utf-8 -*-

"""

Created on Wed Sep 30 15:36:15 2015

@author: tanyouwei

"""

import math

# ItemCF算法

def ItemSimilarity(train):

C = dict()

N = dict()

for u,items in train.items():

for i in items.keys():

N[i] += 1

for j in items.keys():

if i == j:

continue

C[i][j] += 1

W = dict()

for i,related_items in C.items():

for j,cij in related_items.items():

W[i][j] = cij / math.sqrt( N[i] * N[j])

return W

# ItemCF-IUF算法

def ItemSimilarity_v2(train):

C = dict()

N = dict()

for u,items in train.items():

for i in items.keys():

N[i] += 1

for j in items.keys():

if i == j:

continue

C[i][j] += 1 / math.log(1+len(items)*1.0)

W = dict()

for i,related_items in C.items():

for j,cij in related_items.items():

W[i][j] = cij / math.sqrt( N[i] * N[j])

return W

def Recommend(train,user_id,W,K):

rank = dict()

ru = train[user_id]

for i,pi in ru.items():

for j,wj in sorted(W[i].items,key=itemgetter(1),reverse=True)[0:K]:

if j in ru:

continue

rank[j] += pi*wj

return rank

基于UserCF算法

# -*- coding: utf-8 -*-

"""

Created on Wed Sep 30 15:36:15 2015

@author: tanyouwei

"""

import math

'''

基于UserCF的推荐算法

'''

# UserCF算法

def UserSimilarity(train):

item_users = dict()

for u,items in train.items():

for i in items.keys():

if i not in item_users:

item_users[i] = set()

item_users[i].add(u)

C = dict()

N = dict()

for i,users in item_users.items():

for u in users:

N[u] += 1

for v in users:

if u == v:

continue

C[u][v] += 1

W = dict()

for u,related_users in C.items():

for v,cuv in related_users.items():

W[u][v] = cuv / math.sqrt(N[u] * N[v])

return W

# User-IIF算法

def UserSimilarity_v2(train):

item_users = dict()

for u,items in train.items():

for i in items.keys():

if i not in item_users:

item_users[i] = set()

item_users[i].add(u)

C = dict()

N = dict()

for i,users in item_users.items():

for u in users:

N[u] += 1

for v in users:

if u == v:

continue

C[u][v] += 1 / math.log(1+len(users))

W = dict()

for u,related_users in C.items():

for v,cuv in related_users.items():

W[u][v] = cuv / math.sqrt(N[u] * N[v])

return W

def Recommend(user,train,W):

rank = dict()

interacted_items = train[user]

for v,wuv in sorted(W[u].items,key=itemgetter(1),reverse=True)[0:K]:

for i,rvi in train[v].items:

if i in interacted_items:

continue

rank[i] += wuv*rvi

return rank

基于时间上下文的个性化推荐

# -*- coding: utf-8 -*-

"""

Created on Wed Sep 30 15:36:15 2015

@author: tanyouwei

"""

import math

def RecentPopularity(records,alpha,T):

ret = dict()

for user,item,tm in records:

if tm >= T:

continue

addToDict(ret,item,1/(1.0+alpha*(T-tm)))

return ret

def addToDict(dicts,item,value):

pass

def ItemSimilarity(train,alpha):

C = dict()

N = dict()

for u,items in train.items():

for i,tui in items.items():

N[i] += 1

for j,tuj in items.items():

if i == j:

continue

C[i][j] += 1 / (1+alpha*abs(tui-tuj))

W = dict()

for i,related_items in C.items():

for j,cij in related_items.items():

W[i][j] = cij / math.sqrt(N[i] * N[j])

return W

def RecommendItemCF(train,user_id,W,K,t0):

rank = dict()

ru = train[user_id]

for i,pi in ru.items():

for j,wj in sorted(W[i].items(),\

key=itemgetter(1),reverse=True)[0:K]:

if j,tuj in ru.items():

continue

rank[j] += pi * wj / (1 + alpha * (t0 - tuj))

return rank

def UserSimilarity(train):

item_users = dict()

for u,items in train.items():

for i,tui in items.items():

if i not in item_users:

item_users[i] = dict()

item_users[i][u] = tui

C = dict()

N = dict()

for i,users in item_users.items():

for u,tui in users.items():

N[u] += 1

for v,tvi in users.items():

if u == v:

continue

C[u][v] += 1 / (1 + alpha * abs(tui - tvi))

W = dict()

for u,related_users in C.items():

for v,cuv in related_users.items():

W[u][v] = cuv / math.sqrt(N[u] * N[v])

return W

def RecommendUserCF(user,T,train,W):

rank = dict()

interacted_items = train[user]

for v,wuv in sorted(W[u].items,key=itemgetter(1),\

reverse=True)[0:K]:

for i,tvi in train[v].items:

if i in interacted_items:

continue

rank[i] += wuv / (1 + alpha * (T - tvi))

return rank

基于LFM算法的个性化推荐

# -*- coding: utf-8 -*-

"""

Created on Wed Sep 30 15:36:15 2015

@author: tanyouwei

"""

'''

items => {'12':'PHP','1203':'Storm','123':'Ubuntu'}

items_pool => [12,32,121,324,532,123,53,1203,429,2932]

user_items => {'1010':[12,1203,123,429]}

'''

def RandomSelectNagativeSample(items):

ret = dict()

for i in items.keys():

ret[i] = 1

n = 0

for i in range(0,len(items)*3):

item = items_pool[random.randint(0,len(items_pool)-1)]

if item in ret:

continue

ret[item] = 0

n += 1

if n > len(items):

break

return ret

def InitModel(user_items,F):

P = dict()

Q = dict()

for u in user_items.keys():

if u not in P:

P[u] = {}

for f in range(0,F):

P[u][f] = 1

items = user_items.values()

itemLen = len(items[0])

i = 0

while i< itemLen:

ii = items[0][i]

if ii not in Q:

Q[ii] = {}

for f in range(0,F):

Q[ii][f] = 1

i += 1

return [P,Q]

def LatentFactorModel(user_items,F,N,alpha,lambda1):

[P,Q] = InitModel(user_items,F)

for setup in range(0,N):

for user,items in user_items.items():

samples = RandomSelectNagativeSample(items)

for item,rui in samples.items():

eui = rui - Predict(user,item)

for f in range(0,F):

P[user][f] += alpha * (eui * Q[item][f] - lambda1 * P[user][f])

Q[item][f] += alpha * (eui * P[user][f] - lambda1 * Q[item][f])

alpha *= 0.9

return [P,Q]

def Recommend(user,P,Q):

rank = dict()

for f,puf in P[user].items():

for i,pfi in Q[f].items():

if i not in rank:

rank[i] += puf * qfi

return rank

def PersonalRank(G,alpha,root,maxsetup):

rank = dict()

#rank = {x:0 for x in G.keys()}

rank = rank.fromkeys(G.keys(),0)

rank[root] = 1

for k in range(maxsetup):

tmp = dict()

#tmp = {x:0 for x in G.keys()}

tmp = tmp.fromkeys(G.keys(),0)

for i,ri in G.items():

for j,wij in ri.items():

if j not in tmp:

tmp[j] = 0

tmp[j] += alpha * rank[i]/(1.0*len(ri))

if j == root:

tmp[j] += 1 - alpha

rank = tmp

print 'iter:' + str(k) + "\t",

for key,value in rank.items():

print "%s:%.3f,\t" % (key,value),

print

return rank

if __name__ == '__main__':

G = {'A':{'a':1,'c':1},

'B':{'a':1,'b':1,'c':1,'d':1},

'C':{'c':1,'d':1},

'a':{'A':1,'B':1},

'b':{'B':1},

'c':{'A':1,'B':1,'C':1},

'd':{'B':1,'C':1}}

PersonalRank(G,0.85,'A',20)

'''

#items_pool = {'12':'PHP','32':'Nginx','121':'Apache','324':'Erlang','532':'Linux','123':'Ubuntu','53':'Java','1203':'Storm','429':'Kafka','2932':'Flume'}

items_pool = [12,32,121,324,532,123,53,1203,429,2932]

items = {'12':'PHP','1203':'Storm','123':'Ubuntu'}

user_items = {'1010':[12,1203,123,429]}

#print RandomSelectNagativeSample(items)

print InitModel(user_items,4)

'''

基于图的推荐算法

# -*- coding: utf-8 -*-

"""

Created on Wed Sep 30 15:36:15 2015

@author: tanyouwei

"""

'''

基于图的推荐算法,二分图

'''

def PersonalRank(G,alpha,root,maxsetup):

rank = dict()

#rank = {x:0 for x in G.keys()}

rank = rank.fromkeys(G.keys(),0)

rank[root] = 1

for k in range(maxsetup):

tmp = dict()

#tmp = {x:0 for x in G.keys()}

tmp = tmp.fromkeys(G.keys(),0)

for i,ri in G.items():

for j,wij in ri.items():

if j not in tmp:

tmp[j] = 0

tmp[j] += alpha * rank[i]/(1.0*len(ri))

if j == root:

tmp[j] += 1 - alpha

rank = tmp

print 'iter:' + str(k) + "\t",

for key,value in rank.items():

print "%s:%.3f,\t" % (key,value),

print

return rank

if __name__ == '__main__':

G = {'A':{'a':1,'c':1},

'B':{'a':1,'b':1,'c':1,'d':1},

'C':{'c':1,'d':1},

'a':{'A':1,'B':1},

'b':{'B':1},

'c':{'A':1,'B':1,'C':1},

'd':{'B':1,'C':1}}

PersonalRank(G,0.85,'C',20)

基于标签的推荐算法

# -*- coding: utf-8 -*-

"""

Created on Wed Sep 30 15:36:15 2015

@author: tanyouwei

"""

import math

#标签流行度算法

def TagPopularity(records):

tagfreq = dict()

for user,item,tag in records:

if tag not in tagfreq:

tagfreq[tag] = 1

else:

tagfreq[tag] += 1

return tagfreq

#物品相似度余弦算法

def CosineSim(item_tags,i,j):

ret = 0

for b,wib in item_tags[i].items():

if b in item_tags[j]:

ret += wib * item_tags[j][b]

ni = 0

nj = 0

for b,w in item_tags[i].items():

ni += w * w

for b,w in item_tags[j].items():

nj += w * w

if ret == 0:

return 0

return ret / math.sqrt(ni * nj)

#推荐物品的多样性算法

def Diversity(item_tags,recommend_items):

ret = 0

n = 0

for i in recommend_items.keys():

for j in recommend_items.keys():

if i == j:

continue

ret += CosineSim(item_tags,i,j)

n += 1

return ret / (n * 1.0)

def addValueToMat(dicts,index,k,v):

if index not in dicts:

dicts[index] = dict()

dicts[index][k] = v

else:

if k not in dicts[index]:

dicts[index][k] = v

else:

dicts[index][k] += v

def InitStat(records):

user_tags = dict() #存储 user_tags[u][b] = n(u,b)

tag_items = dict() # tag_items[b][i] = n(b,i)

user_items = dict()

for user,item,tag in records.items():

addValueToMat(user_tags,user,tag,1)

addValueToMat(tag_items,tag,item,1)

addValueToMat(user_items,user,item,1)

def Recommend(user):

recommend_items = dict()

tagged_items = user_items[user]

for tag,wut in user_tags[user].items():

# wut = wut*1.0/math.log(1+len(tag_users[tag])) #TagBasedTFIDF and TagBasedTFIDF++

for item,wti in tag_items[tag].items():

# wti = wti*1.0/math.log(1+len(user_items[user])) #TagBasedTFIDF++

if item in tagged_items:

continue

if item not in recommend_items:

recommend_items[item] = wut * wti

else:

recommend_items[item] += wut * wti

return recommend_items

if __name__ == "main":

user_tags = dict()

user_items = dict()

tag_items = dict()

records = dict()

user = '1220';

InitStat(records)

rec_items = Recommend(user)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: