您的位置:首页 > 编程语言 > Python开发

Python 对Twitter tweet的元素 (Word, Screen Name, Hash Tag)的词汇多样性分析

2014-07-03 06:28 483 查看
CODE:

#!/usr/bin/python
# -*- coding: utf-8 -*-

'''
Created on 2014-7-3
@author: guaguastd
@name: tweet_lexical_diversity.py
'''

if __name__ == '__main__':

# import login, see http://blog.csdn.net/guaguastd/article/details/31706155 from login import twitter_login

# get the twitter access api
twitter_api = twitter_login()

# import tweet
from tweet import extract_tweet_entities

# import search
from search import search_for_tweet

# import lexical_diversity
from lexical_diversity import lexical_diversity,average_words

while 1:
query = raw_input('\nInput the query (eg. #MentionSomeoneImportantForYou, exit to quit): ')

if query == 'exit':
print 'Successfully exit!'
break

statuses = search_for_tweet(twitter_api, query)
status_texts,screen_names,hashtags,words = extract_tweet_entities(statuses)

for token in (words, screen_names, hashtags):
print '\rLexical diversity of %s: ' % token
print lexical_diversity(token)

for status in (status_texts,):
print '\rAverage words of %s: ' % status
print average_words(status)


RESULT:

Input the query (eg. #MentionSomeoneImportantForYou, exit to quit): #MentionSomeoneImportantForYou
Length of statuses 30

Lexical diversity of [u'RT', u'@xmlovex:', u'#MentionSomeoneImportantForYou', u'@purpledrauhl_23', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@MissRosaa_', u'#MentionSomeoneImportantForYou', u'@justinbieber', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@_K_L_O_"', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@_K_L_O_', u'\u201c@0hDearPriscii:', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@0hDearPriscii"', u'aww', u'ily\U0001f618\U0001f46f\u201dily2\u2764\ufe0f', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@0hDearPriscii', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@0hDearPriscii"', u'aww', u'ily\U0001f618\U0001f46f', u'#MentionSomeoneImportantForYou', u'@', u'my', u'brotherrrr', u'http://t.co/LprqvaLvyu', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@BeyonceTapia', u'\U0001f498', u'RT', u'@thuggie_salma:', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@thuggie_salma"', u'baeee', u'\U0001f618\U0001f60f\U0001f62d', u'#MentionSomeoneImportantForYou', u'@BeyonceTapia', u'\U0001f498', u'"@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@thuggie_salma"', u'baeee', u'\U0001f618\U0001f60f\U0001f62d', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@thuggie_salma', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@NotNormal_Javi', u'#MentionSomeoneImportantForYou', u'@NotNormal_Javi', u'#MentionSomeoneImportantForYou', u'@thuggie_salma', u'RT', u'@KillahPimpp:', u'#MentionSomeoneImportantForYou', u'@EbbsContreras', u'RT', u'@sashaalexxa_:', u'#MentionSomeoneImportantForYou', u'@', u'#MentionSomeoneImportantForYou', u'@EbbsContreras', u'RT', u'@NotNormal_Javi:', u'#MentionSomeoneImportantForYou', u'cheeseburgers', u'\U0001f354\U0001f354', u'#MentionSomeoneImportantForYou', u'@TaeTae2Beast', u'#MentionSomeoneImportantForYou', u'@', u'#MentionSomeoneImportantForYou', u'@Brendaaa23', u'#MentionSomeoneImportantForYou', u'cheeseburgers', u'\U0001f354\U0001f354', u'#MentionSomeoneImportantForYou', u'@_K_L_O_', u'#MentionSomeoneImportantForYou', u'@MissRosaa_', u'#MentionSomeoneImportantForYou', u'@0hDearPriscii', u'@LoveASharie', u'@DJZeeti', u'Speechless', u'beauty', u'and', u'Pretty', u'smile', u'.#WomanCrushWednesday', u'#MentionSomeoneImportantForYou', u'#TeamSharie', u'@louiswonderwall', u'my', u'babeeeee\U0001f60d\U0001f60d\U0001f60d\U0001f60d\U0001f60d', u'#MentionSomeoneImportantForYou']:
0.407079646018

Lexical diversity of [u'xmlovex', u'KillahPimpp', u'MissRosaa_', u'justinbieber', u'KillahPimpp', u'_K_L_O_', u'KillahPimpp', u'_K_L_O_', u'0hDearPriscii', u'KillahPimpp', u'0hDearPriscii', u'KillahPimpp', u'0hDearPriscii', u'KillahPimpp', u'0hDearPriscii', u'KillahPimpp', u'BeyonceTapia', u'thuggie_salma', u'KillahPimpp', u'thuggie_salma', u'BeyonceTapia', u'KillahPimpp', u'thuggie_salma', u'KillahPimpp', u'thuggie_salma', u'KillahPimpp', u'NotNormal_Javi', u'NotNormal_Javi', u'thuggie_salma', u'KillahPimpp', u'EbbsContreras', u'sashaalexxa_', u'EbbsContreras', u'NotNormal_Javi', u'TaeTae2Beast', u'Brendaaa23', u'_K_L_O_', u'MissRosaa_', u'0hDearPriscii', u'LoveASharie', u'DJZeeti', u'louiswonderwall']:
0.380952380952

Lexical diversity of [u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'MentionSomeoneImportantForYou', u'WomanCrushWednesday', u'MentionSomeoneImportantForYou', u'TeamSharie', u'MentionSomeoneImportantForYou']:
0.09375

Average words of [u'RT @xmlovex: #MentionSomeoneImportantForYou @purpledrauhl_23', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @MissRosaa_', u'#MentionSomeoneImportantForYou @justinbieber', u'"@KillahPimpp: #MentionSomeoneImportantForYou @_K_L_O_"', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @_K_L_O_', u'\u201c@0hDearPriscii: "@KillahPimpp: #MentionSomeoneImportantForYou @0hDearPriscii" aww ily\U0001f618\U0001f46f\u201dily2\u2764\ufe0f', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @0hDearPriscii', u'"@KillahPimpp: #MentionSomeoneImportantForYou @0hDearPriscii" aww ily\U0001f618\U0001f46f', u'#MentionSomeoneImportantForYou @ my brotherrrr http://t.co/LprqvaLvyu', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @BeyonceTapia \U0001f498', u'RT @thuggie_salma: "@KillahPimpp: #MentionSomeoneImportantForYou @thuggie_salma" baeee \U0001f618\U0001f60f\U0001f62d', u'#MentionSomeoneImportantForYou @BeyonceTapia \U0001f498', u'"@KillahPimpp: #MentionSomeoneImportantForYou @thuggie_salma" baeee \U0001f618\U0001f60f\U0001f62d', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @thuggie_salma', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @NotNormal_Javi', u'#MentionSomeoneImportantForYou @NotNormal_Javi', u'#MentionSomeoneImportantForYou @thuggie_salma', u'RT @KillahPimpp: #MentionSomeoneImportantForYou @EbbsContreras', u'RT @sashaalexxa_: #MentionSomeoneImportantForYou @', u'#MentionSomeoneImportantForYou @EbbsContreras', u'RT @NotNormal_Javi: #MentionSomeoneImportantForYou cheeseburgers \U0001f354\U0001f354', u'#MentionSomeoneImportantForYou @TaeTae2Beast', u'#MentionSomeoneImportantForYou @', u'#MentionSomeoneImportantForYou @Brendaaa23', u'#MentionSomeoneImportantForYou cheeseburgers \U0001f354\U0001f354', u'#MentionSomeoneImportantForYou @_K_L_O_', u'#MentionSomeoneImportantForYou @MissRosaa_', u'#MentionSomeoneImportantForYou @0hDearPriscii', u'@LoveASharie @DJZeeti Speechless beauty  and Pretty smile .#WomanCrushWednesday  #MentionSomeoneImportantForYou  #TeamSharie', u'@louiswonderwall my babeeeee\U0001f60d\U0001f60d\U0001f60d\U0001f60d\U0001f60d #MentionSomeoneImportantForYou']:
3.76666666667

Input the query (eg. #MentionSomeoneImportantForYou, exit to quit):
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: