您的位置:首页 > 其它

新浪微博数据挖掘食谱之十三: 微博篇 (批量获取用户的微博)

2015-01-09 06:05 579 查看
#!/usr/bin/python
# -*- coding: utf-8 -*-

'''
Created on 2015-1-9
@author: beyondzhou
@name: harvest_users_weibo.py
'''

# Harvest users weibo
def harvest_users_weibo():

# import
import json
from login import weibo_login
from statuses import harvest_user_timeline

# Access to sina api
weibo_api = weibo_login()

screen_name = 'beyondzhou8'
tweets = harvest_user_timeline(weibo_api, screen_name=screen_name)
print json.dumps(tweets,  indent=1)

if __name__ == '__main__':
harvest_users_weibo()


# Harvest user weibo
def harvest_user_timeline(weibo_api, screen_name=None, user_id=None, max_results=2000):
assert (screen_name != None) != (user_id != None),\
"Must have screen_name or user_id, but not both"

kw = {  # Keyword args for the Weibo API call
'count': 200,
'trim_user': 1,
'since_id' : 1
}

if screen_name:
kw['screen_name'] = screen_name
else:
kw['user_id'] = user_id

max_pages = 16
results = []
tweets = make_weibo_request(weibo_api.statuses.user_timeline.get, **kw)['statuses']
if tweets is None: # 401 (Not Authorized) - Need to bail out on loop entry
tweets = []
results += tweets
print >> sys.stderr, 'Fetched %itweets' % len(tweets)
page_num = 1
if max_results == kw['count']:
page_num = max_pages # Prevent loop entry
while page_num < max_pages and len(tweets) > 0 and len(results) < max_results:
kw['max_id'] = min([ tweet['id'] for tweet in tweets]) - 1
tweets = make_weibo_request(weibo_api.statuses.user_timeline.get, **kw)['statuses']
results += tweets
print >> sys.stderr, 'Fetched %itweets' % (len(tweets),)
page_num += 1
print >> sys.stderr, 'Done fetching tweets'
return results[:max_results]

Result:

callback_url: https://api.weibo.com/oauth2/authorize?redirect_uri=http%3A//apps.weibo.com/guaguastd&response_type=code&client_id=2925245021 return_redirect_uri: http://weibo.com/login.php?url=http%3A%2F%2Fapps.weibo.com%2Fguaguastd%3Fcode%3Dbfbcd4b974f9f4fc9ba31f4f7097d7a1 code: ['bfbcd4b974f9f4fc9ba31f4f7097d7a1']
Fetched 5tweets
Fetched 0tweets
Done fetching tweets
[
{
"darwin_tags": [],
"reposts_count": 0,
"source_type": 1,
"uid": 3286717353,
"favorited": false,
"text": "\u65b0\u6d6a\u5fae\u535a\u6570\u636e\u6316\u6398\u98df\u8c31 http://t.cn/RZyisHd", "created_at": "Wed Dec 31 07:00:28 +0800 2014",
"truncated": false,
"visible": {
"type": 0,
"list_id": 0
},
"idstr": "3793735240326731",
"mid": "3793735240326731",
"source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>",
"attitudes_count": 0,
"pic_urls": [],
"in_reply_to_screen_name": "",
"in_reply_to_user_id": "",
"in_reply_to_status_id": "",
"comments_count": 0,
"geo": null,
"id": 3793735240326731,
"mlevel": 0
},
{
"darwin_tags": [],
"reposts_count": 0,
"source_type": 1,
"uid": 3286717353,
"favorited": false,
"text": "Python \u63d0\u53d6\u65b0\u6d6a\u516c\u5171\u5fae\u535a\u4e2d\u8f6c\u8f7d\u6b21\u6570\u6700\u591a\u7684\u5fae\u535a\u8f6c\u8f7d\u6570\uff0c\u7528\u6237\u540d\uff0c\u5185\u5bb9 http://t.cn/RvFMuci", "created_at": "Fri Jul 11 09:15:04 +0800 2014",
"truncated": false,
"visible": {
"type": 0,
"list_id": 0
},
"idstr": "3731076012679100",
"mid": "3731076012679100",
"source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>",
"attitudes_count": 0,
"pic_urls": [],
"in_reply_to_screen_name": "",
"in_reply_to_user_id": "",
"in_reply_to_status_id": "",
"comments_count": 0,
"geo": null,
"id": 3731076012679100,
"mlevel": 0
},
{
"darwin_tags": [],
"reposts_count": 0,
"source_type": 1,
"uid": 3286717353,
"favorited": false,
"text": "Python \u5bf9\u65b0\u6d6a\u5fae\u535a\u7684\u5143\u7d20 (Word, Screen Name)\u7684\u8bcd\u6c47\u591a\u6837\u6027\u5206\u6790 http://t.cn/Rvk5vp3", "created_at": "Thu Jul 10 07:18:21 +0800 2014",
"truncated": false,
"visible": {
"type": 0,
"list_id": 0
},
"idstr": "3730684251828947",
"mid": "3730684251828947",
"source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>",
"attitudes_count": 0,
"pic_urls": [],
"in_reply_to_screen_name": "",
"in_reply_to_user_id": "",
"in_reply_to_status_id": "",
"comments_count": 0,
"geo": null,
"id": 3730684251828947,
"mlevel": 0
},
{
"darwin_tags": [],
"reposts_count": 0,
"source_type": 1,
"uid": 3286717353,
"favorited": false,
"text": "Python \u5bf9Twitter\u4e2d\u6307\u5b9a\u8bdd\u9898\u7684\u88ab\u8f6c\u8f7dTweet\u6570\u91cf\u7684\u9891\u8c31\u5206\u6790 http://t.cn/RvkGP6L", "created_at": "Thu Jul 10 06:03:48 +0800 2014",
"truncated": false,
"visible": {
"type": 0,
"list_id": 0
},
"idstr": "3730665490725827",
"mid": "3730665490725827",
"source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>",
"attitudes_count": 0,
"pic_urls": [],
"in_reply_to_screen_name": "",
"in_reply_to_user_id": "",
"in_reply_to_status_id": "",
"comments_count": 0,
"geo": null,
"id": 3730665490725827,
"mlevel": 0
},
{
"darwin_tags": [],
"reposts_count": 3,
"source_type": 1,
"uid": 3286717353,
"favorited": false,
"text": "Hi",
"created_at": "Wed Mar 12 16:42:28 +0800 2014",
"truncated": false,
"visible": {
"type": 0,
"list_id": 0
},
"idstr": "3687339672990740",
"mid": "3687339672990740",
"source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>",
"attitudes_count": 0,
"pic_urls": [],
"in_reply_to_screen_name": "",
"in_reply_to_user_id": "",
"in_reply_to_status_id": "",
"comments_count": 3,
"geo": null,
"id": 3687339672990740,
"mlevel": 0
}
]
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: