标签:create 就是 ext graph 折线 isnull ilo als panda
%load_ext autoreload %autoreload 2 %matplotlib inline %config InlineBackend.figure_format=‘retina‘ from __future__ import absolute_import, division, print_function import sys import os import pandas as pd import numpy as np # TSA from Statsmodels import statsmodels.api as sm import statsmodels.formula.api as smf import statsmodels.tsa.api as smt # Display and Plotting import matplotlib.pylab as plt import seaborn as sns pd.set_option(‘display.float_format‘, lambda x: ‘%.5f‘ % x) # pandas np.set_printoptions(precision=5, suppress=True) # numpy pd.set_option(‘display.max_columns‘, 100) pd.set_option(‘display.max_rows‘, 100) # seaborn plotting style sns.set(style=‘ticks‘, context=‘poster‘) filename_ts = ‘aa.csv‘ ts_df = pd.read_csv(filename_ts, index_col=0, parse_dates=[0]) n_sample = ts_df.shape[0] print(ts_df.shape) print(ts_df.head()) # Create a training sample and testing sample before analyzing the series # n_train=int(0.95*n_sample)+1 n_train=int(1*n_sample)+1-6 n_forecast=n_sample-n_train #ts_df # print(ts_df.iloc[n_train][‘Close‘]) ts_train = ts_df.iloc[:n_train][‘Close‘] #从本地读取数据 本文用的是本地数据,未用接口数据 # stock = pd.read_csv(‘aa.csv‘, index_col=0, parse_dates=[0]) # stock.head(10) #下采样 日频数据太多 # stock_week = stock[‘Close‘].resample(‘W-MON‘).mean() #训练数据 # stock_train = stock_week[‘2006‘:‘2020‘] # print(stock_train) # ts_test = ts_df.iloc[n_train:][‘Close‘] # print(ts_train.shape) # print(ts_test.shape) print("Training Series:", "\n", ts_train.tail(), "\n") # print("Testing Series:", "\n", ts_test.head()) print(111) # print(ts_train) def tsplot(y, lags=None, title=‘‘, figsize=(14, 8)): fig = plt.figure(figsize=figsize) layout = (2, 2) ts_ax = plt.subplot2grid(layout, (0, 0)) hist_ax = plt.subplot2grid(layout, (0, 1)) acf_ax = plt.subplot2grid(layout, (1, 0)) pacf_ax = plt.subplot2grid(layout, (1, 1)) y.plot(ax=ts_ax) # 折线图 ts_ax.set_title(title) y.plot(ax=hist_ax, kind=‘hist‘, bins=25) #直方图 hist_ax.set_title(‘Histogram‘) smt.graphics.plot_acf(y, lags=lags, ax=acf_ax) # ACF自相关系数 smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax) # 偏自相关系数 [ax.set_xlim(0) for ax in [acf_ax, pacf_ax]] sns.despine() fig.tight_layout() return ts_ax, acf_ax, pacf_ax tsplot(ts_train, title=‘A Given Training Series‘, lags=20); # Fit the model import warnings # 忽略警告 warnings.filterwarnings(‘ignore‘) arima200 = sm.tsa.SARIMAX(ts_train, order=(1,0,0)) # ARIMA季节性模型,至于p,d,q需要按照下面的方法选择 model_results = arima200.fit() # 此处运用BIC(贝叶斯信息准则)进行模型参数选择 # 另外还可以利用AIC(赤池信息准则),视具体情况而定 import itertools p_min = 0 d_min = 0 q_min = 0 p_max = 4 d_max = 0 q_max = 4 # Initialize a DataFrame to store the results results_bic = pd.DataFrame(index=[‘AR{}‘.format(i) for i in range(p_min,p_max+1)], columns=[‘MA{}‘.format(i) for i in range(q_min,q_max+1)]) for p,d,q in itertools.product(range(p_min,p_max+1), range(d_min,d_max+1), range(q_min,q_max+1)): if p==0 and d==0 and q==0: results_bic.loc[‘AR{}‘.format(p), ‘MA{}‘.format(q)] = np.nan continue try: model = sm.tsa.SARIMAX(ts_train, order=(p, d, q), #enforce_stationarity=False, #enforce_invertibility=False, ) results = model.fit() #此处的result包含了很多信息,具体如果用到需要自己去查询 # http://www.statsmodels.org/stable/tsa.html # print("results.bic",results.bic) # print("results.aic",results.aic) results_bic.loc[‘AR{}‘.format(p), ‘MA{}‘.format(q)] = results.bic except: continue print(p,d,q) results_bic = results_bic[results_bic.columns].astype(float) fig, ax = plt.subplots(figsize=(10, 8)) ax = sns.heatmap(results_bic, mask=results_bic.isnull(), ax=ax, annot=True, fmt=‘.2f‘, ); ax.set_title(‘BIC‘); # Alternative model selection method, limited to only searching AR and MA parameters train_results = sm.tsa.arma_order_select_ic(ts_train, ic=[‘aic‘, ‘bic‘], trend=‘nc‘, max_ar=4, max_ma=4) print(‘AIC‘, train_results.aic_min_order) print(‘BIC‘, train_results.bic_min_order) #残差分析 正态分布 QQ图线性 model_results.plot_diagnostics(figsize=(16, 12)); from statsmodels.tsa.arima_model import ARIMA # print(ts_train) # print(1111) # print(ts_test) model = ARIMA(ts_train,order=(7,1,0)) model = ARIMA(ts_restored,order=(7,1,0)) #第一种情况,导入ARIMA模型 model = ARIMA(data,order=(7,1,0)) #第二种情况,导入ARIMA模型 result = model.fit(disp=-1) print(result.summary()) result.conf_int()#模型诊断,可以发现所有的系数置信区间都不为0;即在5%的置信水平下,所有的系数都是显著的,即模型通过检验。 #最后画出时序图 # fig, ax = plt.subplots(figsize=(12, 10)) # # pred = result.predict(‘20140609‘, ‘20160701‘,dynamic=True, typ=‘levels‘) # ax = ts_train.loc[‘2020‘:].plot(ax=ax) #注意起点是从1901开始 # fig = result.plot_predict(20,500) #因为前面是90个数,所以加上预测的10个就是100 # plt.show() #数据预测并画图 # print(stock_train) # model = ARIMA(stock_train, order=(7, 1, 0)) # model_pred = model.fit()
标签:create 就是 ext graph 折线 isnull ilo als panda
原文地址:https://www.cnblogs.com/hzthyj/p/14724218.html