您的位置:首页 > 其它

pandas处理数据例子

2016-03-30 21:50 381 查看
API : pandas api

import  pandas as pd
import os
import numpy as np
# apply适用的函数,处理每个group
def add_prop(group):
births = group.births.astype(float)
group['prop'] = births / births.sum()
return group
# top1000
def top1000(group):
return group.sort_values(by="births",ascending=False)[:1000]
if __name__ == "__main__":
years = range(1880,2011)
names = ["name","sex","births"]
pieces = []
# read sequence file
for year in years:
path = "names/yob{year}.txt".format(year=year)
if os.path.exists(path):
print "begin to read {path}".format(path=path)
else:
print "{path} does not exists".format(path=path)
continue
# 读取数据
frame = pd.read_table(path,names=names,sep=',')
frame['years'] = year
pieces.append(frame)

names = pd.concat(pieces,ignore_index=True)
#print names[:10]
#数据归总=========================
total_births = names.pivot_table("births",index="years",columns="sex",aggfunc=sum)
total_births.plot(title="births")
#print total_births

grouped = names.groupby(['years','sex']).size().unstack()
#print grouped

#apply
# group函数以及apply配合使用
names = grouped = names.groupby(['years','sex']).apply(add_prop)
print names[:10]
## verify
print np.allclose(names.groupby(['years','sex']).prop.sum(),1)
## every year top 1000
grouped = names.groupby(['years','sex'])
top1000 = grouped.apply(top1000)
#print top1000.ix[top1000['years']==2000]
print top1000.ix[top1000['years']==2000]
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: