您的位置:首页 > 其它

Page rank

2015-06-03 12:09 351 查看
Implement pageRank algorithm in python

run : ./**.py -o output file -d dumping factor -e epsilon infile

date format:

first line: max node number

the left lines: node id : link node count, node id

from __future__ import division
#!usr/bin/python

import sys
import getopt
import scipy.sparse as sp
from scipy.spatial import distance
from numpy import *
import pdb

def receiveArguments(argv):
dfactor=''
epsilon=''
output=''
try:
opts,args=getopt.getopt(argv,'d:e:o:',[])
except getopt.GetoptError:
print 'Wrong arguments'
return
for name,value in opts:
if name=='-d':
dfactor=value
elif name=='-e':
epsilon=value
elif name =='-o':
output=value
else:
pass
if dfactor=='' or epsilon=='' or output=='' or len(args)==0:
print 'Lack arguments'
return
return dfactor,epsilon,output,args[0]

d,e,outfile,infile= receiveArguments(sys.argv[1:])

d=float(d)
e=float(e)

pf=open(infile,'r')
fileList=pf.read()
pf.close()

fileList=fileList.splitlines()
maxNodeLine=fileList[0]
maxNodeLine=maxNodeLine.split(' ')
maxNode=int(maxNodeLine[1])

addP=ones(maxNode)

p2=ones(maxNode)
p1=ones(maxNode)*100
smoothP=p2*(1-d)

#pdb.set_trace()
rowIndex=[]
colIndex=[]
data=[]

for line in fileList[1:]:
line=line.split(':')
nodeId=int(line[0]);
listing=line[1].split(' ')
outNum=int(listing[0])

addP[nodeId-1]=0
for node in listing[1:]:
#		matrixA[nodeId-1][int(node)-1]=int(1/outNum)
rowIndex.append(int(node)-1)
colIndex.append(nodeId-1)
data.append(1.0/outNum)

del fileList
#pdb.set_trace()

smatrixA=sp.csc_matrix((array(data),(array(rowIndex),array(colIndex))),shape=(maxNode,maxNode))

while distance.euclidean(p1,p2)>e:
p1=p2
add=dot(p1,addP)
p2=smoothP+d*(smatrixA.dot(p1)+(add*ones(maxNode)-addP*p1)/(maxNode-1))
#	pdb.set_trace()

del smatrixA
result=''

for nodeId,score in enumerate(p2):
#	print j,':%0.6f'% i
line=str(nodeId+1)+':'+str(float('%.6f'%score))+'\n'
result+=line

pf=open(outfile,'w')
pf.write(result)
pf.close()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: