新浪微博数据挖掘食谱之十三: 微博篇 (批量获取用户的微博)
2015-01-09 06:05
579 查看
#!/usr/bin/python # -*- coding: utf-8 -*- ''' Created on 2015-1-9 @author: beyondzhou @name: harvest_users_weibo.py ''' # Harvest users weibo def harvest_users_weibo(): # import import json from login import weibo_login from statuses import harvest_user_timeline # Access to sina api weibo_api = weibo_login() screen_name = 'beyondzhou8' tweets = harvest_user_timeline(weibo_api, screen_name=screen_name) print json.dumps(tweets, indent=1) if __name__ == '__main__': harvest_users_weibo()
# Harvest user weibo def harvest_user_timeline(weibo_api, screen_name=None, user_id=None, max_results=2000): assert (screen_name != None) != (user_id != None),\ "Must have screen_name or user_id, but not both" kw = { # Keyword args for the Weibo API call 'count': 200, 'trim_user': 1, 'since_id' : 1 } if screen_name: kw['screen_name'] = screen_name else: kw['user_id'] = user_id max_pages = 16 results = [] tweets = make_weibo_request(weibo_api.statuses.user_timeline.get, **kw)['statuses'] if tweets is None: # 401 (Not Authorized) - Need to bail out on loop entry tweets = [] results += tweets print >> sys.stderr, 'Fetched %itweets' % len(tweets) page_num = 1 if max_results == kw['count']: page_num = max_pages # Prevent loop entry while page_num < max_pages and len(tweets) > 0 and len(results) < max_results: kw['max_id'] = min([ tweet['id'] for tweet in tweets]) - 1 tweets = make_weibo_request(weibo_api.statuses.user_timeline.get, **kw)['statuses'] results += tweets print >> sys.stderr, 'Fetched %itweets' % (len(tweets),) page_num += 1 print >> sys.stderr, 'Done fetching tweets' return results[:max_results]
Result:
callback_url: https://api.weibo.com/oauth2/authorize?redirect_uri=http%3A//apps.weibo.com/guaguastd&response_type=code&client_id=2925245021 return_redirect_uri: http://weibo.com/login.php?url=http%3A%2F%2Fapps.weibo.com%2Fguaguastd%3Fcode%3Dbfbcd4b974f9f4fc9ba31f4f7097d7a1 code: ['bfbcd4b974f9f4fc9ba31f4f7097d7a1'] Fetched 5tweets Fetched 0tweets Done fetching tweets [ { "darwin_tags": [], "reposts_count": 0, "source_type": 1, "uid": 3286717353, "favorited": false, "text": "\u65b0\u6d6a\u5fae\u535a\u6570\u636e\u6316\u6398\u98df\u8c31 http://t.cn/RZyisHd", "created_at": "Wed Dec 31 07:00:28 +0800 2014", "truncated": false, "visible": { "type": 0, "list_id": 0 }, "idstr": "3793735240326731", "mid": "3793735240326731", "source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>", "attitudes_count": 0, "pic_urls": [], "in_reply_to_screen_name": "", "in_reply_to_user_id": "", "in_reply_to_status_id": "", "comments_count": 0, "geo": null, "id": 3793735240326731, "mlevel": 0 }, { "darwin_tags": [], "reposts_count": 0, "source_type": 1, "uid": 3286717353, "favorited": false, "text": "Python \u63d0\u53d6\u65b0\u6d6a\u516c\u5171\u5fae\u535a\u4e2d\u8f6c\u8f7d\u6b21\u6570\u6700\u591a\u7684\u5fae\u535a\u8f6c\u8f7d\u6570\uff0c\u7528\u6237\u540d\uff0c\u5185\u5bb9 http://t.cn/RvFMuci", "created_at": "Fri Jul 11 09:15:04 +0800 2014", "truncated": false, "visible": { "type": 0, "list_id": 0 }, "idstr": "3731076012679100", "mid": "3731076012679100", "source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>", "attitudes_count": 0, "pic_urls": [], "in_reply_to_screen_name": "", "in_reply_to_user_id": "", "in_reply_to_status_id": "", "comments_count": 0, "geo": null, "id": 3731076012679100, "mlevel": 0 }, { "darwin_tags": [], "reposts_count": 0, "source_type": 1, "uid": 3286717353, "favorited": false, "text": "Python \u5bf9\u65b0\u6d6a\u5fae\u535a\u7684\u5143\u7d20 (Word, Screen Name)\u7684\u8bcd\u6c47\u591a\u6837\u6027\u5206\u6790 http://t.cn/Rvk5vp3", "created_at": "Thu Jul 10 07:18:21 +0800 2014", "truncated": false, "visible": { "type": 0, "list_id": 0 }, "idstr": "3730684251828947", "mid": "3730684251828947", "source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>", "attitudes_count": 0, "pic_urls": [], "in_reply_to_screen_name": "", "in_reply_to_user_id": "", "in_reply_to_status_id": "", "comments_count": 0, "geo": null, "id": 3730684251828947, "mlevel": 0 }, { "darwin_tags": [], "reposts_count": 0, "source_type": 1, "uid": 3286717353, "favorited": false, "text": "Python \u5bf9Twitter\u4e2d\u6307\u5b9a\u8bdd\u9898\u7684\u88ab\u8f6c\u8f7dTweet\u6570\u91cf\u7684\u9891\u8c31\u5206\u6790 http://t.cn/RvkGP6L", "created_at": "Thu Jul 10 06:03:48 +0800 2014", "truncated": false, "visible": { "type": 0, "list_id": 0 }, "idstr": "3730665490725827", "mid": "3730665490725827", "source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>", "attitudes_count": 0, "pic_urls": [], "in_reply_to_screen_name": "", "in_reply_to_user_id": "", "in_reply_to_status_id": "", "comments_count": 0, "geo": null, "id": 3730665490725827, "mlevel": 0 }, { "darwin_tags": [], "reposts_count": 3, "source_type": 1, "uid": 3286717353, "favorited": false, "text": "Hi", "created_at": "Wed Mar 12 16:42:28 +0800 2014", "truncated": false, "visible": { "type": 0, "list_id": 0 }, "idstr": "3687339672990740", "mid": "3687339672990740", "source": "<a href=\"http://weibo.com/\" rel=\"nofollow\">\u5fae\u535a weibo.com</a>", "attitudes_count": 0, "pic_urls": [], "in_reply_to_screen_name": "", "in_reply_to_user_id": "", "in_reply_to_status_id": "", "comments_count": 3, "geo": null, "id": 3687339672990740, "mlevel": 0 } ]
相关文章推荐
- 新浪微博数据挖掘食谱之十一: 用户篇 (批量获取用户信息)
- 新浪微博数据挖掘食谱之十二: 用户篇 (批量获取用户的粉丝数和朋友数)
- 新浪微博数据挖掘食谱之九: 用户篇 (获取转发微博的用户名)
- 新浪微博数据挖掘食谱之十六: 微博篇 (词汇差异性,词汇均值)
- 新浪微博数据挖掘食谱之六: 元素篇 (提取微博元素)
- 新浪微博数据挖掘食谱之十五: 爬虫篇 (抓取用户的朋友)
- 新浪微博数据挖掘食谱之八: 查询篇 (查询最流行的微博元素)
- 新浪微博数据挖掘食谱之十: 元素篇 (提取转发微博的元素)
- 新浪微博数据挖掘食谱之十四: 用户篇 (分析用户的粉丝和朋友)
- 33day 新浪微博(获取用户微博数据)
- 新浪微博数据挖掘食谱之七: 查询篇 (查询最流行的微博)
- 新浪微博如何挖掘大数据资源为用户带来新价值
- 新浪微博数据挖掘食谱之四: 保存篇 (json text格式)
- 获取微博用户数据
- Python 获取新浪微博指定用户的微博列表
- c#网页方式获取新浪微博的微博数据
- 新浪微博如何挖掘大数据资源为用户带来新价值
- 无法获取到新浪微博中指定用户的第一条微博??
- 新浪微博如何挖掘大数据资源为用户带来新价值
- 【分享】新浪微博用户信息及微博数据集