第十章 时间序列
2017-06-28 09:54
218 查看
from datetime import datetime
now=datetime.now()
now
Out[3]: datetime.datetime(2017, 6, 28, 9, 15, 30, 916000)
now.year,now.month,now.day
Out[4]: (2017, 6, 28)
delta=datetime(2017,6,28)-datetime(2008,6,24,8,15)
delta
Out[6]: datetime.timedelta(3290, 56700)
delta.days
Out[8]: 3290
delta.seconds
Out[9]: 56700
from datetime import timedelta
delta=datetime(2017,9,2)-datetime(2017,6,28,9,26)
delta
Out[12]: datetime.timedelta(65, 52440)
delta=datetime(2017,9,2)-datetime(2017,6,28,19,26)
delta
Out[14]: datetime.timedelta(65, 16440)
#给datetime对象加上(或减去)一个或多个timedelta
from datetime import timedelta
start=datetime(2017,6,28)
start+timedelta(65)
Out[18]: datetime.datetime(2017, 9, 1, 0, 0)
#字符串与datetime的相互转换
stamp=datetime(2017,6,28)
str(stamp)
Out[20]: '2017-06-28 00:00:00'
stamp.strftime('%Y-%m-%d')
Out[21]: '2017-06-28'
value='2017-06-28'
datetime.strptime(value,'%Y-%m-%d')
Out[24]: datetime.datetime(2017, 6, 28, 0, 0)
datestrs=['7/6/2011','8/6/2011']
[datetime.strptime(x,'%m/%d/%Y') for x in datestrs]
Out[29]: [datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]
from dateutil.parser import parse
parse('2017-06-28')
Out[31]: datetime.datetime(2017, 6, 28, 0, 0)
parse('Jun 28,2017 9:45 PM')
Out[32]: datetime.datetime(2017, 6, 28, 21, 45)
#国际通用格式:日在月前面。传入dayfirst=True即可
parse('6/12/2011',dayfirst=True)
Out[33]: datetime.datetime(2011, 12, 6, 0, 0)
datestrs
Out[34]: ['7/6/2011', '8/6/2011']
import pandas as pd
pd.to_datetime(datestrs)
Out[36]: DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)
idx=pd.to_datetime(datestrs+[None])
idx
Out[38]: DatetimeIndex(['2011-07-06', '2011-08-06', 'NaT'], dtype='datetime64[ns]', freq=None)
idx[2]
Out[39]: NaT
#NaT是pandas中时间戳的缺失值
pd.isnull(idx)
Out[40]: array([False, False, True], dtype=bool)
#时间序列基础
from datetime import datetime
import numpy as np
from pandas import Series
dates=[datetime(2011,1,2),datetime(2011,1,5),datetime(2011,1,7),
datetime(2011,1,8),datetime(2011,1,10),datetime(2011,1,12)]
ts=Series(np.random.randn(6),index=dates)
ts
Out[47]:
2011-01-02 1.582904
2011-01-05 -1.439438
2011-01-07 -0.411849
2011-01-08 0.747215
2011-01-10 0.384336
2011-01-12 0.432144
dtype: float64
type(ts)
Out[48]: pandas.core.series.Series
ts.index
Out[49]:
DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
'2011-01-10', '2011-01-12'],
dtype='datetime64[ns]', freq=None)
ts+ts[::2]
Out[50]:
2011-01-02 3.165807
2011-01-05 NaN
2011-01-07 -0.823697
2011-01-08 NaN
2011-01-10 0.768672
2011-01-12 NaN
dtype: float64
#DatetimeIndex的各个标量值是timastamp的对象
stamp=ts.index[0]
stamp
Out[53]: Timestamp('2011-01-02 00:00:00')
#索引、选取、子集的构造
#TimeSeries是Series的一个子类,在索引以及数据选取方面用法相同
stamp=ts.index[2]
ts[stamp]
Out[60]: -0.41184861375211623
#传入一个可被解释为日期的字符串
ts['1/10/2011']
Out[62]: 0.38433616480100646
#对于较长的时间序列,只需传入“年”或“年月”即可轻松选取数据的切片
longer_ts=Series(np.random.randn(1000),index=pd.date_range('1/1/2000',periods=1000))
longer_ts
Out[65]:
2000-01-01 -0.358478
2000-01-02 -0.004255
2000-01-03 -0.051692
2000-01-04 0.704217
2000-01-05 0.489014
2000-01-06 -0.905860
2000-01-07 0.523610
2000-01-08 0.075787
2000-01-09 0.530821
2000-01-10 0.909307
2000-01-11 -0.173890
2000-01-12 0.159520
2000-01-13 -1.144813
2000-01-14 0.549058
2000-01-15 -0.474099
2000-01-16 -1.272199
2000-01-17 0.818596
2000-01-18 -1.209547
2000-01-19 1.403011
2000-01-20 1.430430
2000-01-21 1.203109
2000-01-22 0.830908
2000-01-23 0.100247
2000-01-24 0.466166
2000-01-25 0.548837
2000-01-26 0.328641
2000-01-27 0.485374
2000-01-28 -0.829345
2000-01-29 -0.730873
2000-01-30 -0.196522
2002-08-28 1.224319
2002-08-29 -0.250930
2002-08-30 -0.543701
2002-08-31 -0.072269
2002-09-01 -0.122738
2002-09-02 0.917622
2002-09-03 0.814192
2002-09-04 -0.130414
2002-09-05 -0.368046
2002-09-06 0.552689
2002-09-07 2.261430
2002-09-08 -1.050885
2002-09-09 -0.015522
2002-09-10 0.560897
2002-09-11 -0.069163
2002-09-12 -0.432103
2002-09-13 -0.847568
2002-09-14 1.127829
2002-09-15 0.795124
2002-09-16 -1.620212
2002-09-17 0.307566
2002-09-18 0.159686
2002-09-19 0.491870
2002-09-20 -0.760113
2002-09-21 -1.108078
2002-09-22 -1.078166
2002-09-23 1.054080
2002-09-24 1.758295
2002-09-25 0.283322
2002-09-26 0.416153
Freq: D, dtype: float64
#通过日期进行切片的方式只对规则Series有效
ts[datetime(2011,1,7):]
2011-01-07 -0.411849
2011-01-08 0.747215
2011-01-10 0.384336
2011-01-12 0.432144
dtype: float64
#也可用不存在该时间序列中的时间戳对其进行切片
ts['1/6/2011':'1/11/2011']
Out[69]:
2011-01-07 -0.411849
2011-01-08 0.747215
2011-01-10 0.384336
dtype: float64
ts.truncate(after='1/9/2011')
Out[70]:
2011-01-02 1.582904
2011-01-05 -1.439438
2011-01-07 -0.411849
2011-01-08 0.747215
dtype: float64
from pandas import DataFrame
dates=pd.date_range('1/1/2000',periods=100,freq='W-WED')
long_df=DataFrame(np.random.randn(100,4),index=dates,columns=['Colorado','Texas','New York','Ohio'])
long_df.ix['6-2001']
Colorado Texas New York Ohio
2001-06-06 0.663364 -0.225784 -0.397083 0.270297
2001-06-13 0.978811 1.239237 1.860109 0.322908
2001-06-20 -1.187403 1.011756 0.817634 0.503117
2001-06-27 -0.110130 0.367503 1.627412 0.758577
now=datetime.now()
now
Out[3]: datetime.datetime(2017, 6, 28, 9, 15, 30, 916000)
now.year,now.month,now.day
Out[4]: (2017, 6, 28)
delta=datetime(2017,6,28)-datetime(2008,6,24,8,15)
delta
Out[6]: datetime.timedelta(3290, 56700)
delta.days
Out[8]: 3290
delta.seconds
Out[9]: 56700
from datetime import timedelta
delta=datetime(2017,9,2)-datetime(2017,6,28,9,26)
delta
Out[12]: datetime.timedelta(65, 52440)
delta=datetime(2017,9,2)-datetime(2017,6,28,19,26)
delta
Out[14]: datetime.timedelta(65, 16440)
#给datetime对象加上(或减去)一个或多个timedelta
from datetime import timedelta
start=datetime(2017,6,28)
start+timedelta(65)
Out[18]: datetime.datetime(2017, 9, 1, 0, 0)
#字符串与datetime的相互转换
stamp=datetime(2017,6,28)
str(stamp)
Out[20]: '2017-06-28 00:00:00'
stamp.strftime('%Y-%m-%d')
Out[21]: '2017-06-28'
value='2017-06-28'
datetime.strptime(value,'%Y-%m-%d')
Out[24]: datetime.datetime(2017, 6, 28, 0, 0)
datestrs=['7/6/2011','8/6/2011']
[datetime.strptime(x,'%m/%d/%Y') for x in datestrs]
Out[29]: [datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]
from dateutil.parser import parse
parse('2017-06-28')
Out[31]: datetime.datetime(2017, 6, 28, 0, 0)
parse('Jun 28,2017 9:45 PM')
Out[32]: datetime.datetime(2017, 6, 28, 21, 45)
#国际通用格式:日在月前面。传入dayfirst=True即可
parse('6/12/2011',dayfirst=True)
Out[33]: datetime.datetime(2011, 12, 6, 0, 0)
datestrs
Out[34]: ['7/6/2011', '8/6/2011']
import pandas as pd
pd.to_datetime(datestrs)
Out[36]: DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)
idx=pd.to_datetime(datestrs+[None])
idx
Out[38]: DatetimeIndex(['2011-07-06', '2011-08-06', 'NaT'], dtype='datetime64[ns]', freq=None)
idx[2]
Out[39]: NaT
#NaT是pandas中时间戳的缺失值
pd.isnull(idx)
Out[40]: array([False, False, True], dtype=bool)
#时间序列基础
from datetime import datetime
import numpy as np
from pandas import Series
dates=[datetime(2011,1,2),datetime(2011,1,5),datetime(2011,1,7),
datetime(2011,1,8),datetime(2011,1,10),datetime(2011,1,12)]
ts=Series(np.random.randn(6),index=dates)
ts
Out[47]:
2011-01-02 1.582904
2011-01-05 -1.439438
2011-01-07 -0.411849
2011-01-08 0.747215
2011-01-10 0.384336
2011-01-12 0.432144
dtype: float64
type(ts)
Out[48]: pandas.core.series.Series
ts.index
Out[49]:
DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
'2011-01-10', '2011-01-12'],
dtype='datetime64[ns]', freq=None)
ts+ts[::2]
Out[50]:
2011-01-02 3.165807
2011-01-05 NaN
2011-01-07 -0.823697
2011-01-08 NaN
2011-01-10 0.768672
2011-01-12 NaN
dtype: float64
#DatetimeIndex的各个标量值是timastamp的对象
stamp=ts.index[0]
stamp
Out[53]: Timestamp('2011-01-02 00:00:00')
#索引、选取、子集的构造
#TimeSeries是Series的一个子类,在索引以及数据选取方面用法相同
stamp=ts.index[2]
ts[stamp]
Out[60]: -0.41184861375211623
#传入一个可被解释为日期的字符串
ts['1/10/2011']
Out[62]: 0.38433616480100646
#对于较长的时间序列,只需传入“年”或“年月”即可轻松选取数据的切片
longer_ts=Series(np.random.randn(1000),index=pd.date_range('1/1/2000',periods=1000))
longer_ts
Out[65]:
2000-01-01 -0.358478
2000-01-02 -0.004255
2000-01-03 -0.051692
2000-01-04 0.704217
2000-01-05 0.489014
2000-01-06 -0.905860
2000-01-07 0.523610
2000-01-08 0.075787
2000-01-09 0.530821
2000-01-10 0.909307
2000-01-11 -0.173890
2000-01-12 0.159520
2000-01-13 -1.144813
2000-01-14 0.549058
2000-01-15 -0.474099
2000-01-16 -1.272199
2000-01-17 0.818596
2000-01-18 -1.209547
2000-01-19 1.403011
2000-01-20 1.430430
2000-01-21 1.203109
2000-01-22 0.830908
2000-01-23 0.100247
2000-01-24 0.466166
2000-01-25 0.548837
2000-01-26 0.328641
2000-01-27 0.485374
2000-01-28 -0.829345
2000-01-29 -0.730873
2000-01-30 -0.196522
2002-08-28 1.224319
2002-08-29 -0.250930
2002-08-30 -0.543701
2002-08-31 -0.072269
2002-09-01 -0.122738
2002-09-02 0.917622
2002-09-03 0.814192
2002-09-04 -0.130414
2002-09-05 -0.368046
2002-09-06 0.552689
2002-09-07 2.261430
2002-09-08 -1.050885
2002-09-09 -0.015522
2002-09-10 0.560897
2002-09-11 -0.069163
2002-09-12 -0.432103
2002-09-13 -0.847568
2002-09-14 1.127829
2002-09-15 0.795124
2002-09-16 -1.620212
2002-09-17 0.307566
2002-09-18 0.159686
2002-09-19 0.491870
2002-09-20 -0.760113
2002-09-21 -1.108078
2002-09-22 -1.078166
2002-09-23 1.054080
2002-09-24 1.758295
2002-09-25 0.283322
2002-09-26 0.416153
Freq: D, dtype: float64
#通过日期进行切片的方式只对规则Series有效
ts[datetime(2011,1,7):]
2011-01-07 -0.411849
2011-01-08 0.747215
2011-01-10 0.384336
2011-01-12 0.432144
dtype: float64
#也可用不存在该时间序列中的时间戳对其进行切片
ts['1/6/2011':'1/11/2011']
Out[69]:
2011-01-07 -0.411849
2011-01-08 0.747215
2011-01-10 0.384336
dtype: float64
ts.truncate(after='1/9/2011')
Out[70]:
2011-01-02 1.582904
2011-01-05 -1.439438
2011-01-07 -0.411849
2011-01-08 0.747215
dtype: float64
from pandas import DataFrame
dates=pd.date_range('1/1/2000',periods=100,freq='W-WED')
long_df=DataFrame(np.random.randn(100,4),index=dates,columns=['Colorado','Texas','New York','Ohio'])
long_df.ix['6-2001']
Colorado Texas New York Ohio
2001-06-06 0.663364 -0.225784 -0.397083 0.270297
2001-06-13 0.978811 1.239237 1.860109 0.322908
2001-06-20 -1.187403 1.011756 0.817634 0.503117
2001-06-27 -0.110130 0.367503 1.627412 0.758577
相关文章推荐
- 《利用python进行数据分析》读书笔记--第十章 时间序列(二)
- 第十章 时间序列(中)
- 《利用python进行数据分析》第十章 时间序列(一)
- 《利用python进行数据分析》读书笔记--第十章 时间序列(一)
- 《利用python进行数据分析》读书笔记--第十章 时间序列(三)
- 【HDU】5256 系列转换(上涨时间最长的序列修饰)
- R语言时间序列函数整理
- 海量时间序列数据的实时查询系统(Druid系统)概述
- 基于R语言的时间序列的预测
- 确定性时间序列
- 【论文研读】事件与时间序列的关联
- 面试题:n个整数的序列,其中一个整数重复次数超过一半,在O(n)时间内找出该整数
- Python 收集Twitter时间序列数据
- 使用时间序列分解模型预测商品销量
- 时间序列分析及应用 R语言 读书笔记 02
- CNTK API文档翻译(11)——使用LSTM预测时间序列数据(物联网数据)
- 设计一个O(n2)时间的算法,找出由n个数组成的序列的最长单调递增子序列。
- R学习日记——分解时间序列(季节性数据)
- 时间序列(五)股票分析
- 人工智能:python 实现 第十一章,使用Pandas处理时间序列数据