您的位置:首页 > 其它

获取动态加载ajax的数据

2017-10-08 14:58 393 查看


get_info

import re
import urllib2
import json
import pandas as pd

class Qyinyue(object):
def __init__(self, url):
self.url = url
global datas
datas = []
self.start()

def start(self):
my_page = self.get_page()
self.get_info(my_page)

def get_page(self):
url = self.url
try:
req = urllib2.Request(url, headers={'User-agent': 'Magic Browser'})
my_page = urllib2.urlopen(req).read().decode('utf-8')
except urllib2.HTTPError as e:
if hasattr(e, 'code'):
print "e.code:%s" % e.code
elif hasattr(e, 'reason'):
print "e.reason:%s" % e.reason
return my_page

def get_info(self, my_page):
find_start = my_page.find('[')
end_find = my_page.find(']')
my_page = my_page[find_start: end_find + 1]
pd_json = json.loads(my_page)
datas.extend(pd_json)


mythread

import time

import pandas as pd

import mythread as mt
import get_info as gi

if __name__ == '__main__':
urlList = []
for i in range(1, 100):
urlList.append(
'https://c.y.qq.com/v8/fcg-bin/v8.fcg?channel=singer&page=list&key=all_all_all&pagesize=100&g_tk=5381&jsonpCallback=GetSingerListCallback&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&pagenum=' + str(
i))
threadList = [mt.Mythread(gi.Qyinyue, (url,)) for url in urlList]
start_time = time.time()
for t in threadList:
t.setDaemon(True)
t.start()
for t in threadList:
t.join()
end_time = time.time()
df = pd.DataFrame(gi.datas, columns=['Fother_name', 'Fsinger_id', 'Fsinger_name'])
print df
print "use the time is %s s" % (end_time - start_time)
''


testmain

import time

import pandas as pd

import mythread as mt
import get_info as gi

if __name__ == '__main__':
urlList = []
for i in range(1, 100):
urlList.append(
'https://c.y.qq.com/v8/fcg-bin/v8.fcg?channel=singer&page=list&key=all_all_all&pagesize=100&g_tk=5381&jsonpCallback=GetSingerListCallback&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0&pagenum=' + str(
i))
threadList = [mt.Mythread(gi.Qyinyue, (url,)) for url in urlList]
start_time = time.time()
for t
baee
in threadList:
t.setDaemon(True)
t.start()
for t in threadList:
t.join()
end_time = time.time()
df = pd.DataFrame(gi.datas, columns=['Fother_name', 'Fsinger_id', 'Fsinger_name'])
print df
print "use the time is %s s" % (end_time - start_time)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: