pandas essential functions
2015-07-23 07:11
316 查看
In [8]: df[:2] Out[8]: A B C 2000-01-01 0.187483 -1.933946 0.377312 2000-01-02 0.734122 2.141616 -0.011225 In [9]: df.columns = [x.lower() for x in df.columns] In [10]: df Out[10]: a b c 2000-01-01 0.187483 -1.933946 0.377312 2000-01-02 0.734122 2.141616 -0.011225 2000-01-03 0.048869 -1.360687 -0.479010 2000-01-04 -0.859661 -0.231595 -0.527750 2000-01-05 -1.296337 0.150680 0.123836 2000-01-06 0.571764 1.555563 -0.823761 2000-01-07 0.535420 -1.032853 1.469725 2000-01-08 1.304124 1.449735 0.203109
In [157]: s Out[157]: a -1.010924 b -0.672504 c -1.139222 d 0.354653 e 0.563622 dtype: float64 In [158]: s.reindex([’e’, ’b’, ’f’, ’d’]) Out[158]: e 0.563622 b -0.672504 f NaN d 0.354653 dtype: float64
In [176]: rng = date_range(’1/3/2000’, periods=8) In [177]: ts = Series(randn(8), index=rng) In [178]: ts2 = ts[[0, 3, 6]] In [179]: ts Out[179]: 2000-01-03 0.480993 2000-01-04 0.604244 2000-01-05 -0.487265 2000-01-06 1.990533 2000-01-07 0.327007 2000-01-08 1.053639 2000-01-09 -2.927808 2000-01-10 0.082065 Freq: D, dtype: float64 In [180]: ts2 Out[180]: 2000-01-03 0.480993 2000-01-06 1.990533 2000-01-09 -2.927808 dtype: float64 In [181]: ts2.reindex(ts.index) Out[181]: 2000-01-03 0.480993 2000-01-04 NaN 2000-01-05 NaN 2000-01-06 1.990533 2000-01-07 NaN 2000-01-08 NaN 2000-01-09 -2.927808 2000-01-10 NaN Freq: D, dtype: float64 In [182]: ts2.reindex(ts.index, method=’ffill’) Out[182]: 2000-01-03 0.480993 2000-01-04 0.480993 2000-01-05 0.480993 2000-01-06 1.990533 2000-01 4000 -07 1.990533 2000-01-08 1.990533 2000-01-09 -2.927808 2000-01-10 -2.927808 Freq: D, dtype: float64 In [183]: ts2.reindex(ts.index, method=’bfill’) Out[183]: 2000-01-03 0.480993 2000-01-04 1.990533 2000-01-05 1.990533 2000-01-06 1.990533 2000-01-07 -2.927808 2000-01-08 -2.927808 2000-01-09 -2.927808 2000-01-10 NaN Freq: D, dtype: float64 In [184]: ts2.reindex(ts.index).fillna(method=’ffill’) Out[184]: 2000-01-03 0.480993 2000-01-04 0.480993 2000-01-05 0.480993 2000-01-06 1.990533 2000-01-07 1.990533 2000-01-08 1.990533 2000-01-09 -2.927808 2000-01-10 -2.927808 Freq: D, dtype: float64
In [185]: df Out[185]: one three two a -0.626544 NaN -0.351587 b -0.138894 -0.177289 1.136249 c 0.011617 0.462215 -0.448789 d NaN 1.124472 -1.101558 In [186]: df.drop([’a’, ’d’], axis=0) Out[186]: one three two b -0.138894 -0.177289 1.136249 c 0.011617 0.462215 -0.448789 In [187]: df.drop([’one’], axis=1) Out[187]: three two a NaN -0.351587 b -0.177289 1.136249 c 0.462215 -0.448789 d 1.124472 -1.101558 In [188]: df.reindex(df.index - [’a’, ’d’]) Out[188]: one three two b -0.138894 -0.177289 1.136249 c 0.011617 0.462215 -0.448789
In [191]: df.rename(columns={’one’ : ’foo’, ’two’ : ’bar’}, .....: index={’a’ : ’apple’, ’b’ : ’banana’, ’d’ : ’durian’}) .....: Out[191]: foo three bar apple -0.626544 NaN -0.351587 banana -0.138894 -0.177289 1.136249 c 0.011617 0.462215 -0.448789 durian NaN 1.124472 -1.101558
In [205]: s = Series(date_range(’20130101 09:10:12’,periods=4)) In [206]: s Out[206]: 0 2013-01-01 09:10:12 1 2013-01-02 09:10:12 2 2013-01-03 09:10:12 3 2013-01-04 09:10:12 dtype: datetime64[ns] In [207]: s.dt.hour Out[207]: 0 9 1 9 2 9 3 9 dtype: int64 In [208]: s.dt.second Out[208]: 0 12 1 12 2 12 3 12 dtype: int64 In [209]: s.dt.day Out[209]: 0 1 1 2 2 3 3 4 dtype: int64
In [226]: unsorted_df = df.reindex(index=[’a’, ’d’, ’c’, ’b’], .....: columns=[’three’, ’two’, ’one’]) .....: In [227]: unsorted_df.sort_index() Out[227]: three two one a NaN -0.351587 -0.626544 b -0.177289 1.136249 -0.138894 c 0.462215 -0.448789 0.011617 d 1.124472 -1.101558 NaN In [228]: unsorted_df.sort_index(ascending=False) Out[228]: three two one d 1.124472 -1.101558 NaN c 0.462215 -0.448789 0.011617 b -0.177289 1.136249 -0.138894 a NaN -0.351587 -0.626544 In [230]: df1 = DataFrame({’one’:[2,1,1,1],’two’:[1,3,2,4],’three’:[5,4,3,2]}) In [231]: df1.sort_index(by=’two’) Out[231]: one three two 0 2 5 1 2 1 3 2 1 1 4 3 3 1 2 4 In [232]: df1[[’one’, ’two’, ’three’]].sort_index(by=[’one’,’two’]) Out[232]: one two three 2 1 2 3 1 1 3 4 3 1 4 2 0 2 1 5
In [248]: df1.columns = MultiIndex.from_tuples([(’a’,’one’),(’a’,’two’),(’b’,’three’)]) In [249]: df1.sort_index(by=(’a’,’two’)) Out[249]: a b one two three 3 1 2 4 2 1 3 2 1 1 4 3
In [271]: df3 Out[271]: A B C 0 1.090748 -1.508174 0 1 1.734810 -0.502623 0 2 0.110879 0.529008 0 3 -3.629600 0.590536 1 4 0.675238 0.296947 0 5 -0.327398 0.007045 255 6 2.025163 0.707877 1 7 -1.998126 0.950661 0 In [272]: df3.dtypes Out[272]: A float32 B float64 C float64 dtype: object # conversion of dtypes In [273]: df3.astype(’float32’).dtypes Out[273]: A float32 B float32 C float32 dtype: object In [277]: df3[’D’] = df3[’D’].astype(’float16’) In [278]: df3[’E’] = df3[’E’].astype(’int32’) In [279]: df3.dtypes Out[279]: A float32 B float64 C float64 D float16 E int32 dtype: object
相关文章推荐
- Leetcode NO.111 Minimum Depth of Binary Tree
- Java 中最常见的 5 个错误
- 滴滴快车奖励政策,高峰奖励,翻倍奖励,按成交率,指派单数分级(持续更新...)
- 7.23翻倍奖励——滴滴快车单(成交率≥80%,≥10指派单)
- 2015年07月21日第13天笔记
- 7.23翻倍奖励——滴滴快车单(成交率≥60%,≥5指派单)
- 后台系统重构碰到的问题
- 7.23翻倍奖励——滴滴快车单(成交率≥50%,≥1指派单)
- C Objcet类
- programming-challenges A multiplication game (110505) 题解
- Git 使用中的问题
- Scala中隐式转换内幕操作规则揭秘
- 多线程(线程的概述,创建线程,控制线程,线程同步,线程池)
- 如何打印运行时加载类的包名(物理包名,不是路径)
- 优步司机如何联系客服?uber客服渠道,Uber优步司机客服渠道
- LeetCode Happy Number
- LeetCode Happy Number
- spring官网下载jar包
- UberX及以上级别车奖励政策(优步北京第四组)
- UberX及以上级别车奖励政策(优步北京第二、三组)