您的位置:首页 > 移动开发

Appstore proj: Calculate an App's Top-5 related apps(1)

2016-01-12 11:01 507 查看
How to calculate similarity?

use cosine similarity!











Edit helper.py.

So you can call the function in helper class when needed

>>> helper = Helper()

similarity = helper.cosine_similarity(app_list1, app_list2)

import operator
import math

class Helper(object):
@classmethod
def cosine_similarity(cls, app_list1, app_list2):
match_count = cls.__count_match(app_list1, app_list2)
return float(match_count) / math.sqrt( len(app_list1) * len(app_list2))

@classmethod
def __count_match(cls, list1, list2):
count = 0
for element in list1:
if element in list2:
count += 1
return count

def calculate_top_5(app, user_download_history):
#create a dict to store each other app and its similarity to this app
app_similarity = {}  #{app_id: similarity}
for apps in user_download_history:
#calculate the similarity
similarity = Helper.cosine_similarity([app], apps)
for other_app in apps:
if app_similarity.has_key(other_app):
app_similarity[other_app] = app_similarity[other_app] + similarity
else:
app_similarity[other_app] = similarity

# There could be app without related apps (not in any download history)
if not app_similarity.has_key(app):
return

#sort app_similarity dict by value and get the top 5 as recommendation
app_similarity.pop(app)
sorted_tups = sorted(app_similarity.items(), key=operator.itemgetter(1), reverse=True) #sort by similarity
top_5_app = [sorted_tups[0][0], sorted_tups[1][0], sorted_tups[2][0], sorted_tups[3][0], sorted_tups[4][0]]
print("top_5_app for " + str(app) + ":\t" + str(top_5_app))


Edit dataservice.py

retrieve and store data

from pymongo import MongoClient
import random

# about data
class DataService(object):
@classmethod
def init(cls, client):
cls.client = client
cls.db = client.appstore
cls.user_download_history = cls.db.user_download_history
cls.app_info = cls.db.app_info

@classmethod
def retrieve_user_download_history(cls, filter_dict={}):
#return a dict {user_id: download_history} containing user download history data
#return all data in the collection if no filter is specified
result = {}
cursor = cls.user_download_history.find(filter_dict)
for user_download_history in cursor:
result[user_download_history['user_id']] = user_download_history['download_history']
return result


Edit main.py

from pymongo import MongoClient
from dataservice import DataService
from helper import calculate_top_5

def main():
try:
#get MongoDB client and set it in DataService
client = MongoClient('localhost', 27017)
DataService.init(client)
#work flow
user_download_history = DataService.retrieve_user_download_history()
calculate_top_5('C10107104', user_download_history.values())
except Exception as e:
print(e)
finally:
#clean up work
if 'client' in locals():
client.close()

if __name__ == "__main__":
main()


output:

top_5_app for C10107104: [u'C10129690', u'C5341', u'C20252', u'C10191382', u'C183901']
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: