您的位置:首页 > 其它

轴向连接和层次化索引

2018-01-27 14:47 489 查看
# encoding=utf-8
import numpy as np
import pandas as pd

#  轴向连接
# 1
arr = np.arange(12).reshape((3, 4))
print 'arr:=\n', arr
arr_concatenate_arr = np.concatenate([arr, arr], axis=1)
# 2
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])
pd_s123 = pd.concat([s1, s2, s3], axis=1)
print 'pd_s123:=\n', pd_s123

s4 = pd.concat([s1 * 5, s3])
print 's4:=\n', s4

s1_concat_s4 = pd.concat([s1, s4], axis=1)
print 's1_concat_s4:=\n', s1_concat_s4
# 3
result = pd.concat([s1, s2, s3], keys=['one', 'two', 'three'])
print 'result:=\n', result
unstackresult = result.unstack()
print 'unstackresult:=\n', unstackresult
# 4
result = pd.concat([s1, s2, s3], axis=0, keys=['one', 'two', 'three'])
print 'result:=\n', result
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'],
columns=['one', 'two']
)
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2),
index=['a', 'c'],
columns=['three', 'four']
)

print 'df1:=\n', df1
print 'df2:=\n', df2
# df1_concat_df2 = pd.concat([df1, df2], axis=1, keys=['level1', 'level2'])
# print 'df1_concat_df2:=\n', df1_concat_df2
# df1_concat_df2 = pd.concat({'level1': df1, 'level2': df2}, axis=1)
# print 'df1_concat_df2:=\n', df1_concat_df2

df1_concat_df2 = pd.concat([df1, df2], axis=1, keys=['level1', 'level2'],
names=['upper', 'lower']
)
print 'df1_concat_df2:=\n', df1_concat_df2

# 5
df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])
print 'df1:=\n', df1
print 'df2:=\n', df2
df1_concat_df2 = pd.concat([df1, df2], ignore_index=True)
print 'df1_concat_df2:=\n', df1_concat_df2
df1_concat_df2 = pd.concat([df1, df2], ignore_index=False)
print 'df1_concat_df2:=\n', df1_concat_df2
## # 合并重叠数据
# 1
a = pd.Series(
[np.nan, 2.5, np.nan, 3.5, 4.5, np.nan],
index=['f', 'e', 'd', 'c', 'b', 'a']
)
b = pd.Series(
np.arange(len(a), dtype=np.float64),
index=['f', 'e', 'd', 'c', 'b', 'a']
)
b[-1] = np.nan
print 'a:=\n', a
print 'b:=\n', b
# print 'np.nan:=\n', np.nan
# print 'np.where(pd.isnull(a), b, a):=\n', np.where(pd.isnull(a), b, a)
# 2
#  数据填补
print 'b[:-2].combine_first(a[2:]):=\n', b[:-2].combine_first(a[2:])
# 3
df1 = pd.DataFrame(
{
'a': [1., np.nan, 5., np.nan],
'b': [np.nan, 2., np.nan, 6.],
'c': range(2, 18, 4)
})
df2 = pd.DataFrame(
{
'a': [5., 4., np.nan, 3., 7.],
'b': [np.nan, 3., 4., 6., 8.]
}
)
df1.combine_first(df2)

# ## 重塑层次化索引
#  ---stack:将数据的列"旋转" 为行
#  ---Unstack: 将数据的行"旋转"为列
data = pd.DataFrame(np.arange(6).reshape((2, 3)),
index=pd.Index(['Ohio', 'Colorado'], name='state'),
columns=pd.Index(['one', 'two', 'three'], name='number')
)
print 'data:=\n', data
result = data.stack()
print 'result:=\n', result
print 'result.unstack():=\n', result.unstack()
print 'result.unstack(0):=\n', result.unstack(0)
print 'result.unstack(\'state\')', result.unstack('state')

# 2
s1 = pd.Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])
s2 = pd.Series([4, 5, 6], index=['c', 'd', 'e'])
data2 = pd.concat([s1, s2], keys=['one', 'two'])
print 'data2:=\n', data2
print 'data2.unstack():=\n', data2.unstack()
print 'data2.unstack().stack():=\n', data2.unstack().stack()
#  不去除缺失值
print 'data2.unstack().stack(dropna=False):=\n', data2.unstack().stack(dropna=False)
# 3
print 'result:=\n', result
df = pd.DataFrame(
{
'left': result,
'right': result + 5
},
columns=pd.Index(['left', 'right'], name='side')
)
print 'df:=\n', df
print 'df.unstack(\'state\'):=\n', df.unstack('state')
print 'df.unstack(\'state\').stack(\'side\'):=\n', df.unstack('state').stack('side')

print 'test'
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: