您的位置:首页 > 其它

时间序列

2017-06-22 23:58 253 查看

import pandas as pd, numpy as np
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from matplotlib.ticker import MaxNLocator
def acf_pacf(series, lags=40, title=None, figsize=(10,6)):
    #ACF and PACF plots
    #
求自相关和偏自相关函数
    # series
输入的时间序列
    # lags
自相关和偏自相关函数的滞后取值范围
    fig = plt.figure(figsize=figsize)
    ax1 = fig.add_subplot(211)
    ax1.set_xlabel('lags')
    ax1.set_ylabel('Autocorrelation')
    # x坐标为整数
    ax1.xaxis.set_major_locator(MaxNLocator(integer=True))
    plot_acf(series.dropna(), lags=lags, ax=ax1, title='ACF of %s'%title)
    ax2 = fig.add_subplot(212)
    ax2.set_xlabel('lags')
    ax2.set_ylabel('Partial Autocorrelation')
    ax2.xaxis.set_major_locator(MaxNLocator(integer=True))
    plot_pacf(series.dropna(), lags=lags, ax=ax2, title='PACF of %s'%title)
    fig.tight_layout()
    
def test_stationarity(timeseries, window=10):
    #
求移动平均和移动标准差
    # window为选取的时间窗口个数
    rolmean = timeseries.rolling(window=window, center=False).mean()
    rolstd = timeseries.rolling(window=window, center=False).std()
 
    #
将以周重取样后的时间序列、移动平均和移动标准差制成折线图
#    orig = plt.plot(timeseries, color='blue', label='Original')
#    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
#    std = plt.plot(rolstd, color='black', label='Rolling Std')
#    plt.legend(loc='best')
#    plt.title('Rolling Mean & Standard Deviation')
#    plt.show(block=False)
 
    #
用Augmented Dickey-Fuller检验测试时间序列稳定性:
    print('Results of Augmented Dickey-Fuller Test:')
    #
使用减小AIC的办法估算ADF测试所需的滞后数
    dftest = adfuller(timeseries, autolag='AIC')
    #
将ADF测试结果、显著性概率、所用的滞后数和所用的观测数打印出来
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value',

                                             'Num Lags Used',

                                             'Num Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
#    print(dfoutput)    
    
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.stats.diagnostic import acorr_ljungbox
def decompose_plot(series, title=''):
    decomposition = seasonal_decompose(series)
    trend = decomposition.trend
    seasonal = decomposition.seasonal
    residual = decomposition.resid
    fig = decomposition.plot()
    fig.set_size_inches(10,6)
    fig.suptitle(title)
    fig.tight_layout()
    fig2 = acf_pacf(residual, title='Residuals', figsize=(10,4))
 
#
读取CSV
df = pd.read_csv('D:/jingjihu/portland-oregon-average-monthly-.csv')
#print(df.head(), '\nindex type:\n', type(df.index))
 
#
按'%Y-%m'格式转换CSV的日期,
为了方便处理,
将时间段转为timestamp
df['Month'] = pd.to_datetime(df['Month'], format='%Y-%m')
 
#
索引并resample为月
indexed_df = df.set_index('Month')
ts = indexed_df['riders']
ts = ts.resample('M').sum()
print(ts.head(), '\nindex type:\n', type(ts.index))
 
ts.plot(title='Monthly Num. of Ridership')
test_stationarity(ts)
decomposition = seasonal_decompose(ts)  
fig = plt.figure()  
fig = decomposition.plot()  
fig.set_size_inches(10, 6)
acf_pacf(decomposition.resid, title='Residuals')
ts_sdiff = ts - ts.shift(12)
print(ts_sdiff.shape)
test_stationarity(ts_sdiff.dropna())
 
decomposition = seasonal_decompose(ts_sdiff.dropna(), freq=12)  
fig = plt.figure()  
fig = decomposition.plot()  
fig.set_size_inches(10, 6)
acf_pacf(decomposition.resid, title='Residuals')
ts_diff_of_sdiff = ts_sdiff - ts_sdiff.shift(1)
test_stationarity(ts_diff_of_sdiff.dropna())
decomposition = seasonal_decompose(ts_diff_of_sdiff.dropna(), freq=12)  
fig = plt.figure()  
fig = decomposition.plot()  
fig.set_size_inches(10, 6)
acf_pacf(decomposition.resid, title='Residuals')
ts_log = np.log(ts)
ts_log_sdiff = ts_log - ts_log.shift(12)
ts_diff_of_log_sdiff = ts_log_sdiff - ts_log_sdiff.shift(1)
test_stationarity(ts_diff_of_log_sdiff.dropna())
decomposition = seasonal_decompose(ts_diff_of_log_sdiff.dropna(), freq=12)  
fig = plt.figure()  
fig = decomposition.plot()  
fig.set_size_inches(10, 6)
acf_pacf(decomposition.resid, title='Residuals')
from statsmodels.tsa.statespace.sarimax import SARIMAX
# SARIMA的参数: order = p, d, q, seasonal_order=P, D, Q, season的周期=12
model = SARIMAX(ts, order=(0,1,0),seasonal_order=(0,1,1,12))
#
已拟合的模型
fitted = model.fit()  
# ARIMA的结果
print(fitted.summary())
 
 比较拟合的数据和原始数据,
#ts.plot()
fitted.fittedvalues.plot(color='red')
 
#
拟合的数据和原始数据的残差
residuals = pd.DataFrame(fitted.resid)a
 
acf_pacf(residuals, title='Residuals', figsize=(10,4))
 
#
计算拟残差平方和(RSS)
plt.title('RSS: %.4f'% sum((fitted.fittedvalues-ts)**2))
 
#
残差的核密度估计
residuals.plot(kind='kde')
print('\n\nResiduals Summary:\n', residuals.describe())
#
plt.show()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: