您的位置:首页 > 编程语言 > Python开发

pandas基础-Python3

2017-01-02 23:14 357 查看
未完

for examples:

example 1:

# Code based on Python 3.x
# _*_ coding: utf-8 _*_
# __Author: "LEMON"

import pandas as pd

d = pd.date_range('20170101', periods=7)
aList = list(range(1,8))

df = pd.DataFrame(aList, index=d, columns=[' '])
df.index.name = 'value'

print('----------df.index---------')
print(df.index)

print('---------df.columns---------')
print(df.columns)

print('----------df.values---------')
print(df.values)

print('----------df.describe--------')
print(df.describe)

print('----------information details--------')
print(df.head(2)) #获取开始的n条记录
print(df.tail(3)) #后去最后的n条记录
print(df[3:5])  # df[a:b],获取第a+1至第b-1的记录


运行结果如下:

----------df.index---------
DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
'2017-01-05', '2017-01-06', '2017-01-07'],
dtype='datetime64[ns]', name='value', freq='D')
---------df.columns---------
Index([' '], dtype='object')
----------df.values---------
[[1]
[2]
[3]
[4]
[5]
[6]
[7]]
----------df.describe--------
<bound method NDFrame.describe of
value
2017-01-01  1
2017-01-02  2
2017-01-03  3
2017-01-04  4
2017-01-05  5
2017-01-06  6
2017-01-07  7>
----------information details--------

value
2017-01-01  1
2017-01-02  2

value
2017-01-05  5
2017-01-06  6
2017-01-07  7

value
2017-01-04  4
2017-01-05  5


example 2:

# Code based on Python 3.x
# _*_ coding: utf-8 _*_
# __Author: "LEMON"

from pandas import Series, DataFrame
import pandas as pd

data = {'state': ['Ohino', 'Ohino', 'Ohino', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}

df = DataFrame(data, index=list(range(1, 6)),
columns=['year', 'state', 'pop', 'name'])
print(df)

print('\n', '---------------')
print(list(df.ix[3]))

print('\n', '---------------')
print(list(df['year']))

aList = ['1', '2', '3', '4']
bList = ['aa', 'bb', 'cb', 'dd']
cList = ['lemon', 'apple', 'orange', 'banana']

d = {'num': aList, 'char': bList, 'fruit': cList}

df1 = DataFrame(d, index=['a', 'b', 'c', 'd'])
# df2 = DataFrame(bList)
print('\n', '---------------')
print(df1)
#print(df1.num)

print('\n', '---------------')
print(df1.ix['b'])  # 获取索引号为 'b' 的行的数据

print('\n', '---------------')
print(df1.ix[:2, 1:3]) # 以切片形式获取部分数据


运行结果如下:

year   state  pop name
1  2000   Ohino  1.5  NaN
2  2001   Ohino  1.7  NaN
3  2002   Ohino  3.6  NaN
4  2001  Nevada  2.4  NaN
5  2002  Nevada  2.9  NaN

---------------
[2002, 'Ohino', 3.6000000000000001, nan]

---------------
[2000, 2001, 2002, 2001, 2002]

---------------
char   fruit num
a   aa   lemon   1
b   bb   apple   2
c   cb  orange   3
d   dd  banana   4

---------------
char        bb
fruit    apple
num          2
Name: b, dtype: object

---------------
fruit num
a  lemon   1
b  apple   2


example 3 (数据选择-DateFrame.loc()和DateFrame.iloc()) :

# Code based on Python 3.x
# _*_ coding: utf-8 _*_
# __Author: "LEMON"

from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
import pandas as pd

today = date.today()

start =(today.year-4, today.month+11, today.day-1)
end = (today.year-4, today.month+11, today.day+3)
quotes = quotes_historical_yahoo_ochl('AMX', start, end)
# each items in quotes is type of "tuple"

fields = ['date', 'open', 'close', 'high', 'low', 'volume']

quotes1 = []
for t in quotes:
t1 = list(t)
quotes1.append(t1)
# each items in quotes1 is type of "list"

for i in range(0, len(quotes1)):
quotes1[i][0] = date.fromordinal(int(quotes1[i][0]))
# date format is changed

df = pd.DataFrame(quotes1, index=range(1, len(quotes1)+1), columns=fields)
# df = pd.DataFrame(quotes1, index=['a','b','c','d','e'], columns=fields)
# df = df.drop(['date'], axis=1)

print(df)

print(df['close'].mean())  #计算某列的mean值
# print(dict(df.mean())['close'])  #计算某列的mean值

print(df.sort_values(['open'],ascending = True)) #进行排序,默认(True)是升序
print(df[df.open>=21].date)

# index是整数
print(df.loc[2:5, 'date':'close'])
print(df.loc[[2,5],['open','close']])
# loc方法在行和列的选择上是标签形式,可以是连续的选择,或者单个行或列的选择
print(df.iloc[1:6,0:4])  #iloc方法以切片形式选取数据

# index是标签形式
# print(df.loc['a':'d', 'date':'close'])
# print(df.loc[['b','e'],['open','close']])
# loc方法在行和列的选择上是标签形式,可以是连续的选择,或者单个行或列的选择

# 根据判断条件来选择数据
print(df[(df.index>=4) & (df.open>=21)])

# DateFrame 的均值
print(df.mean())  # 默认计算每列的均值
print(df.mean(axis=1))  # axis=1是计算每行的均值

'''
# 获取多只股票的信息
d1 = (today.year-1, today.month+11, today.day)

aList = ['BABA', 'KO', 'AMX'] # List of the stock code of companys

for i in aList:
q1 = quotes_historical_yahoo_ochl(i, d1, today)
df1 = pd.DataFrame(q1)
print(df1)
'''


运行结果如下:

date       open      close       high        low     volume
1  2013-12-03  20.999551  21.156955  21.184731  20.795851  5152600.0
2  2013-12-04  20.971773  20.934738  21.064364  20.703261  5174400.0
3  2013-12-05  20.518079  20.545857  21.231027  20.379193  7225600.0
4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
20.80974025
20.80974025
date       open      close       high        low     volume
3  2013-12-05  20.518079  20.545857  21.231027  20.379193  7225600.0
2  2013-12-04  20.971773  20.934738  21.064364  20.703261  5174400.0
1  2013-12-03  20.999551  21.156955  21.184731  20.795851  5152600.0
4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
4    2013-12-06
Name: date, dtype: object

runfile('E:/Python/Anaco/test_yahoo.py', wdir='E:/Python/Anaco')
date       open      close       high        low     volume
1  2013-12-03  20.999551  21.156955  21.184731  20.795851  5152600.0
2  2013-12-04  20.971773  20.934738  21.064364  20.703261  5174400.0
3  2013-12-05  20.518079  20.545857  21.231027  20.379193  7225600.0
4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
20.80974025
date       open      close       high        low     volume
3  2013-12-05  20.518079  20.545857  21.231027  20.379193  7225600.0
2  2013-12-04  20.971773  20.934738  21.064364  20.703261  5174400.0
1  2013-12-03  20.999551  21.156955  21.184731  20.795851  5152600.0
4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
4    2013-12-06
Name: date, dtype: object
date       open      close
2  2013-12-04  20.971773  20.934738
3  2013-12-05  20.518079  20.545857
4  2013-12-06  21.166215  20.601411
open      close
2  20.971773  20.934738
5        NaN        NaN
date       open      close       high
2  2013-12-04  20.971773  20.934738  21.064364
3  2013-12-05  20.518079  20.545857  21.231027
4  2013-12-06  21.166215  20.601411  21.295841
date       open      close       high        low     volume
4  2013-12-06  21.166215  20.601411  21.295841  20.536598  9989500.0
open      2.091390e+01
close     2.080974e+01
high      2.119399e+01
low       2.060373e+01
volume    6.885525e+06
dtype: float64
1    1.030537e+06
2    1.034897e+06
3    1.445137e+06
4    1.997917e+06
dtype: float64


View Code

examples 4: 求微软公司(MSFT)2015年每月股票收盘价的平均值。

# Code based on Python 3.x
# _*_ coding: utf-8 _*_
# __Author: "LEMON"

# 求微软公司(MSFT)2015年每月股票收盘价的平均值。

#Method 1 (update)

from matplotlib.finance import quotes_historical_yahoo_ochl
from datetime import date
import pandas as pd
from datetime import datetime

today = date.today()
fields = ['date', 'open', 'close', 'high', 'low', 'volume']

start = (today.year - 3, today.month, today.day)
end = today
quotes = quotes_historical_yahoo_ochl('MSFT', start, end)
# each items in quotes is type of "tuple"

df = pd.DataFrame(quotes, index=range(1, len(quotes) + 1), columns=fields)

list = df.date.tolist()
list1 = []
for x in list:
x = date.fromordinal(int(x))
y = date.strftime(x, '%Y/%m')
list1.append(y)

# print(list1)
df1 = df.set_index([list1]).drop('date',axis=1)
# 把日期设置成索引,并删除“date”列

df2 = df1['2015/01':'2015/12']  #选取2015年的数据
print(df2.groupby(df2.index).close.mean())
# 将数据按index进行聚类分析,并计算收盘价“close”的均值

# -----------------------------------------------------
# #Method 1 (old)
#
# from matplotlib.finance import quotes_historical_yahoo_ochl
# from datetime import date
# import pandas as pd
# from datetime import datetime
#
#
# today = date.today()
# fields = ['date', 'open', 'close', 'high', 'low', 'volume']
#
# start2 = (today.year - 3, today.month, today.day)
# end2 = today
# quotes2 = quotes_historical_yahoo_ochl('MSFT', start2, end2)
# # each items in quotes is type of "tuple"
#
# quotes3 = []
# for t in quotes2:
#     t1 = list(t)
#     quotes3.append(t1)
# # each items in quotes1 is type of "list"
#
# for i in range(0, len(quotes3)):
#     quotes3[i][0] = date.fromordinal(int(quotes3[i][0]))
#     # date format is changed
#
# df2 = pd.DataFrame(quotes3, index=range(1, len(quotes3) + 1), columns=fields)
#
# df2['date'] = pd.to_datetime(df2['date'], format='%Y-%m-%d')  # 转化成pandas的日期格式
# # print(df2)
#
# start2015 = datetime(2015,1,1)
# end2015 = datetime(2015,12,31)
# # start2015 = datetime.strptime('2015-1-1', '%Y-%m-%d')
# # # 将'2015-1-1'字符串设置为时间格式
# # end2015 = datetime.strptime('2015-12-31', '%Y-%m-%d')
# # # 将'2015-12-31'字符串设置为时间格式
#
# df1 = df2[(start2015 <= df2.date) & (df2.date <= end2015)]
# # 通过时间条件来选择2015年的记录
#
# permonth1 = df1.date.dt.to_period('M')  #data per month
# g_month1 = df1.groupby(permonth1)
# g_closequotes = g_month1['close']
#
# s_month = g_closequotes.mean()  # s_month is Series class
# s_month.index.name = 'date_index'
#
# print(s_month)
# -----------------------------------------------------

# =================================================================
# Method 2

# from matplotlib.finance import quotes_historical_yahoo_ochl
# from datetime import date
#
# import pandas as pd
# today = date.today()
# start = (today.year-3, today.month, today.day)
# quotesMS = quotes_historical_yahoo_ochl('MSFT', start, today)
# attributes=['date','open','close','high','low','volume']
# quotesdfMS = pd.DataFrame(quotesMS, columns= attributes)
#
#
#
# list = []
# for i in range(0, len(quotesMS)):
#     x = date.fromordinal(int(quotesMS[i][0]))
#     y = date.strftime(x, '%y/%m/%d')
#     list.append(y)
# quotesdfMS.index = list
# quotesdfMS = quotesdfMS.drop(['date'], axis = 1)
# list = []
# quotesdfMS15 = quotesdfMS['15/01/01':'15/12/31']
#
# print(quotesdfMS15)
#
# for i in range(0, len(quotesdfMS15)):
#     list.append(int(quotesdfMS15.index[i][3:5])) #get month just like '02'
# quotesdfMS15['month'] = list
# print(quotesdfMS15.groupby('month').mean().close)
# =================================================================


输出结果如下:

2015/01    43.124433
2015/02    40.956772
2015/03    40.203918
2015/04    41.477685
2015/05    45.472291
2015/06    44.145879
2015/07    43.807541
2015/08    43.838895
2015/09    42.114155
2015/10    47.082882
2015/11    52.252878
2015/12    53.916431
Name: close, dtype: float64
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: