您的位置:首页 > 其它

pandas essential functions

2015-07-23 07:11 316 查看
In [8]: df[:2]
Out[8]:
A B C
2000-01-01 0.187483 -1.933946 0.377312
2000-01-02 0.734122 2.141616 -0.011225
In [9]: df.columns = [x.lower() for x in df.columns]
In [10]: df
Out[10]:
a        b         c
2000-01-01 0.187483 -1.933946 0.377312
2000-01-02 0.734122 2.141616 -0.011225
2000-01-03 0.048869 -1.360687 -0.479010
2000-01-04 -0.859661 -0.231595 -0.527750
2000-01-05 -1.296337 0.150680 0.123836
2000-01-06 0.571764 1.555563 -0.823761
2000-01-07 0.535420 -1.032853 1.469725
2000-01-08 1.304124 1.449735 0.203109


In [157]: s
Out[157]:
a -1.010924
b -0.672504
c -1.139222
d 0.354653
e 0.563622
dtype: float64
In [158]: s.reindex([’e’, ’b’, ’f’, ’d’])
Out[158]:
e 0.563622
b -0.672504
f NaN
d 0.354653
dtype: float64


In [176]: rng = date_range(’1/3/2000’, periods=8)
In [177]: ts = Series(randn(8), index=rng)
In [178]: ts2 = ts[[0, 3, 6]]
In [179]: ts
Out[179]:
2000-01-03 0.480993
2000-01-04 0.604244
2000-01-05 -0.487265
2000-01-06 1.990533
2000-01-07 0.327007
2000-01-08 1.053639
2000-01-09 -2.927808
2000-01-10 0.082065
Freq: D, dtype: float64
In [180]: ts2
Out[180]:
2000-01-03 0.480993
2000-01-06 1.990533
2000-01-09 -2.927808
dtype: float64
In [181]: ts2.reindex(ts.index)
Out[181]:
2000-01-03 0.480993
2000-01-04 NaN
2000-01-05 NaN
2000-01-06 1.990533
2000-01-07 NaN
2000-01-08 NaN
2000-01-09 -2.927808
2000-01-10 NaN
Freq: D, dtype: float64
In [182]: ts2.reindex(ts.index, method=’ffill’)
Out[182]:
2000-01-03 0.480993
2000-01-04 0.480993
2000-01-05 0.480993
2000-01-06 1.990533
2000-01
4000
-07 1.990533
2000-01-08 1.990533
2000-01-09 -2.927808
2000-01-10 -2.927808
Freq: D, dtype: float64
In [183]: ts2.reindex(ts.index, method=’bfill’)
Out[183]:
2000-01-03 0.480993
2000-01-04 1.990533
2000-01-05 1.990533
2000-01-06 1.990533
2000-01-07 -2.927808
2000-01-08 -2.927808
2000-01-09 -2.927808
2000-01-10 NaN
Freq: D, dtype: float64
In [184]: ts2.reindex(ts.index).fillna(method=’ffill’)
Out[184]:
2000-01-03 0.480993
2000-01-04 0.480993
2000-01-05 0.480993
2000-01-06 1.990533
2000-01-07 1.990533
2000-01-08 1.990533
2000-01-09 -2.927808
2000-01-10 -2.927808
Freq: D, dtype: float64


In [185]: df
Out[185]:
one three two
a -0.626544 NaN -0.351587
b -0.138894 -0.177289 1.136249
c 0.011617 0.462215 -0.448789
d NaN 1.124472 -1.101558
In [186]: df.drop([’a’, ’d’], axis=0)
Out[186]:
one       three     two
b -0.138894 -0.177289 1.136249
c 0.011617  0.462215  -0.448789
In [187]: df.drop([’one’], axis=1)
Out[187]:
three    two
a NaN      -0.351587
b -0.177289 1.136249
c 0.462215 -0.448789
d 1.124472 -1.101558
In [188]: df.reindex(df.index - [’a’, ’d’])
Out[188]:
one       three     two
b -0.138894 -0.177289 1.136249
c 0.011617  0.462215  -0.448789


In [191]: df.rename(columns={’one’ : ’foo’, ’two’ : ’bar’},
.....: index={’a’ : ’apple’, ’b’ : ’banana’, ’d’ : ’durian’})
.....:
Out[191]:
foo       three     bar
apple  -0.626544 NaN       -0.351587
banana -0.138894 -0.177289 1.136249
c      0.011617  0.462215  -0.448789
durian NaN       1.124472  -1.101558


In [205]: s = Series(date_range(’20130101 09:10:12’,periods=4))
In [206]: s
Out[206]:
0 2013-01-01 09:10:12
1 2013-01-02 09:10:12
2 2013-01-03 09:10:12
3 2013-01-04 09:10:12
dtype: datetime64[ns]
In [207]: s.dt.hour
Out[207]:
0 9
1 9
2 9
3 9
dtype: int64
In [208]: s.dt.second
Out[208]:
0 12
1 12
2 12
3 12
dtype: int64
In [209]: s.dt.day
Out[209]:
0 1
1 2
2 3
3 4
dtype: int64


In [226]: unsorted_df = df.reindex(index=[’a’, ’d’, ’c’, ’b’],
.....: columns=[’three’, ’two’, ’one’])
.....:
In [227]: unsorted_df.sort_index()
Out[227]:
three     two       one
a NaN       -0.351587 -0.626544
b -0.177289 1.136249  -0.138894
c 0.462215  -0.448789 0.011617
d 1.124472  -1.101558 NaN
In [228]: unsorted_df.sort_index(ascending=False)
Out[228]:
three     two       one
d 1.124472  -1.101558 NaN
c 0.462215  -0.448789 0.011617
b -0.177289 1.136249  -0.138894
a NaN       -0.351587 -0.626544
In [230]: df1 = DataFrame({’one’:[2,1,1,1],’two’:[1,3,2,4],’three’:[5,4,3,2]})
In [231]: df1.sort_index(by=’two’)
Out[231]:
one three two
0 2   5     1
2 1   3     2
1 1   4     3
3 1   2     4
In [232]: df1[[’one’, ’two’, ’three’]].sort_index(by=[’one’,’two’])
Out[232]:
one two three
2 1   2   3
1 1   3   4
3 1   4   2
0 2   1   5


In [248]: df1.columns = MultiIndex.from_tuples([(’a’,’one’),(’a’,’two’),(’b’,’three’)])
In [249]: df1.sort_index(by=(’a’,’two’))
Out[249]:
a       b
one two three
3 1   2   4
2 1   3   2
1 1   4   3


In [271]: df3
Out[271]:
A         B         C
0 1.090748  -1.508174 0
1 1.734810  -0.502623 0
2 0.110879  0.529008  0
3 -3.629600 0.590536  1
4 0.675238  0.296947  0
5 -0.327398 0.007045  255
6 2.025163  0.707877  1
7 -1.998126 0.950661  0
In [272]: df3.dtypes
Out[272]:
A float32
B float64
C float64
dtype: object
# conversion of dtypes
In [273]: df3.astype(’float32’).dtypes
Out[273]:
A float32
B float32
C float32
dtype: object
In [277]: df3[’D’] = df3[’D’].astype(’float16’)
In [278]: df3[’E’] = df3[’E’].astype(’int32’)
In [279]: df3.dtypes
Out[279]:
A float32
B float64
C float64
D float16
E int32
dtype: object
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: