본문 바로가기
  • 紹睿: 자유롭고 더불어 사는 가치있는 삶
Study/파이썬으로 데이터 주무르기

[시계열 데이터 분석] numpy를 이용한 시계열 데이터 분석

by 징여 2018. 7. 18.
반응형
 

numpy를 이용한 시계열 데이터 분석¶

In [5]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas_datareader import data
from fbprophet import Prophet
from datetime import datetime
 

폰트 설정¶

In [6]:
from matplotlib import font_manager, rc

rc('font', family='AppleGothic')
plt.rcParams['axes.unicode_minus'] = False
In [37]:
pinkwink_web = pd.read_csv('./data/07. PinkWink Web Traffic.csv', encoding='utf-8', thousands=',', names=['date', 'hit'], index_col=0)
pinkwink_web = pinkwink_web[pinkwink_web['hit'].notnull()]
pinkwink_web.head(3)
Out[37]:
  hit
date  
16. 7. 1. 766.0
16. 7. 2. 377.0
16. 7. 3. 427.0
In [38]:
pinkwink_web['hit'].plot(figsize=(12, 4), grid=True)
Out[38]:
<matplotlib.axes._subplots.AxesSubplot at 0x117a161d0>
 
In [13]:
time = np.arange(0, len(pinkwink_web))
traffic = pinkwink_web['hit'].values
fx = np.linspace(0, time[-1], 1000)
In [19]:
def error(f, x, y):
    return np.sqrt(np.mean((f(x)-y)**2))
In [31]:
fp1 = np.polyfit(time, traffic, 1)
f1 = np.poly1d(fp1)

f2p = np.polyfit(time, traffic, 2)
f2 = np.poly1d(f2p)

f3p = np.polyfit(time, traffic, 3)
f3 = np.poly1d(f3p)

f5p = np.polyfit(time, traffic, 5)
f5 = np.poly1d(f5p)

f15p = np.polyfit(time, traffic, 15)
f15 = np.poly1d(f15p)

print(error(f1, time, traffic))
print(error(f2, time, traffic))
print(error(f3, time, traffic))
print(error(f5, time, traffic))
print(error(f15, time, traffic))
 
430.8597308110963
430.62841018946955
429.5328046676293
389.8483612021826
330.4777307248983
In [32]:
plt.figure(figsize=(10, 6))
plt.scatter(time, traffic, s=10)

plt.plot(fx, f1(fx), lw=4, label='f1')
plt.plot(fx, f2(fx), lw=4, label='f2')
plt.plot(fx, f3(fx), lw=4, label='f3')
plt.plot(fx, f5(fx), lw=4, label='f5')
plt.plot(fx, f15(fx), lw=4, label='f15')
plt.grid(True, linestyle='-', color='0.75')
plt.legend(loc=2) # f1, f2 ...위치
plt.show()
 
 

Prophet 모듈을 이용한 forecast 예측¶

In [39]:
df = pd.DataFrame({'ds': pinkwink_web.index, 'y': pinkwink_web['hit']})
df.reset_index(inplace=True)
df['ds'] = pd.to_datetime(df['ds'], format="%y. %m. %d.")
del df['date']
df.head(3)
Out[39]:
  ds y
0 2016-07-01 766.0
1 2016-07-02 377.0
2 2016-07-03 427.0
In [40]:
m = Prophet(yearly_seasonality=True)
m.fit(df)
Out[40]:
<fbprophet.forecaster.Prophet at 0x117a06c50>
In [41]:
future = m.make_future_dataframe(periods=60)
future.tail(3)
Out[41]:
  ds
422 2017-08-27
423 2017-08-28
424 2017-08-29
In [43]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(3)
Out[43]:
  ds yhat yhat_lower yhat_upper
422 2017-08-27 641.503999 435.113740 849.683688
423 2017-08-28 1194.071095 979.746565 1394.709571
424 2017-08-29 1229.894880 1008.136578 1424.921642
In [44]:
m.plot(forecast)
Out[44]:
 
In [46]:
m.plot_components(forecast)
Out[46]:
 

 

반응형

댓글