#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""The ARIMA model (stand for Auto Regressive Integrated Moving Average) is a classical statistical model for time series data
It contains three main components from its name
- AR, Auto Regressive, means the variable of interest (time series) is regressed on its own lagged values
- MA, Moving Average, means the regression error is a linear combination of error terms whose values occurred contemporaneously and at various times in the past
- I, Integrated, means data values have been replaced with the difference between their values and the previous value
We use the implementation in statsmodels <https://www.statsmodels.org/stable/index.html> and re-write the API to adapt Kats development style.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import logging
from typing import List, Dict, Optional, Callable, Any
import kats.models.model as m
import numpy as np
import pandas as pd
from kats.consts import Params, TimeSeriesData
from kats.utils.parameter_tuning_utils import get_default_arima_parameter_search_space
from statsmodels.tsa.arima_model import ARIMA
[docs]class ARIMAParams(Params):
"""Parameter class for ARIMA model
This is the parameter class for ARIMA model, it contains all necessary parameters from the following ARIMA implementation:
https://www.statsmodels.org/stable/generated/statsmodels.tsa.arima_model.ARIMA.html
Attributes:
p: An integer for trend autoregressive (AR) order
d: An integer for trend difference order
q: An integer for trend moving average (MA) order
exog: Optional; An array of exogenous regressors
dates: Optional; pandas-compatible datetime object
freq: Optional; frequency of a given time series
"""
__slots__ = ["p", "d", "q"]
def __init__(self, p: int, d: int, q: int, **kwargs) -> None:
super().__init__()
self.p = p
self.d = d
self.q = q
self.exog = kwargs.get("exog", None)
self.dates = kwargs.get("dates", None)
self.freq = kwargs.get("freq", None)
logging.debug(
"Initialized ARIMAParams with parameters. "
"p:{p}, d:{d}, q:{q}, kwargs:{kwargs}".format(p=p, d=d, q=q, kwargs=kwargs)
)
def validate_params(self):
logging.info("Method validate_params() is not implemented.")
pass
[docs]class ARIMAModel(m.Model):
"""Model class for ARIMA model
Attributes:
data: :class:`kats.consts.TimeSeriesData`, the input historical time series data from TimeSeriesData
params: The ARIMA model parameters from ARIMAParams
"""
def __init__(self, data: TimeSeriesData, params: ARIMAParams) -> None:
super().__init__(data, params)
if not isinstance(self.data.value, pd.Series):
msg = "Only support univariate time series, but get {type}.".format(
type=type(self.data.value)
)
logging.error(msg)
raise ValueError(msg)
self.exog = None
self.alpha = 0.05
self.freq = None
self.model = None
self.include_history = False
self.alpha = 0.05
self.fcst_df = None
self.freq = None
self.y_fcst = None
self.y_fcst_lower = None
self.y_fcst_upper = None
[docs] def fit(
self,
start_params: Optional[np.ndarray] = None,
transparams: bool = True,
method: str = "css-mle",
trend: str = "c",
solver: str = "lbfgs",
maxiter: int = 500,
full_output: bool = True,
disp: int = 5,
callback: Optional[Callable] = None,
start_ar_lags: Optional[int] = None,
**kwargs,
) -> None:
"""Fit ARIMA model with given parameters
For more details on each parameter please refer to the following doc:
https://www.statsmodels.org/stable/generated/statsmodels.tsa.arima_model.ARIMA.fit.html#statsmodels.tsa.arima_model.ARIMA.fit
Args:
start_params: Optional; An array_like object for the initial guess of the solution for the loglikelihood maximization
transparams: Optional; A boolean to specify whether or not to transform the parameters to ensure stationarity. Default is True
method: A string that specifies the loglikelihood to maximize. Can be 'css-mle', 'mle' and 'css'. Default is 'css-mle'
trend: A string that specifies the whether to include a constant in the trend or not. Can be 'c' and 'nc'. Default is 'c'
solver: Optional; A string that specifies specifies the solver to be used. Can be 'bfgs', 'newton', 'cg', 'ncg' and 'powell'. Default is 'bfgs'
maxiter: Optional; A integer for the maximum number of function iterations. Default is 500
tol: Optional; The convergence tolerance for the fitting. Default is 1e-08
full_output: Optional; A boolean to specify whether to show all output from the solver in the results. Default is True
disp: Optional; A integer to control the frequency of the output during the iterations. Default is 5
callback: Optional; A callable object to be called after each iteration. Default is None
start_ar_lags Optional; An integer to specify the AR lag parameter to fit the start_params. Default is None
Returns:
None
"""
logging.debug("Call fit() method")
# pyre-fixme[16]: `ARIMAModel` has no attribute `start_params`.
self.start_params = start_params
# pyre-fixme[16]: `ARIMAModel` has no attribute `transparams`.
self.transparams = transparams
# pyre-fixme[16]: `ARIMAModel` has no attribute `method`.
self.method = method
# pyre-fixme[16]: `ARIMAModel` has no attribute `trend`.
self.trend = trend
# pyre-fixme[16]: `ARIMAModel` has no attribute `solver`.
self.solver = solver
# pyre-fixme[16]: `ARIMAModel` has no attribute `maxiter`.
self.maxiter = maxiter
# pyre-fixme[16]: `ARIMAModel` has no attribute `full_output`.
self.full_output = full_output
# pyre-fixme[16]: `ARIMAModel` has no attribute `disp`.
self.disp = disp
# pyre-fixme[16]: `ARIMAModel` has no attribute `callback`.
self.callback = callback
# pyre-fixme[16]: `ARIMAModel` has no attribute `start_ar_lags`.
self.start_ar_lags = start_ar_lags
arima = ARIMA(
self.data.value,
order=(self.params.p, self.params.d, self.params.q),
exog=self.params.exog,
dates=self.data.time,
freq=self.params.freq,
)
logging.info("Created arima model.")
self.model = arima.fit(
start_params=self.start_params,
transparams=self.transparams,
method=self.method,
trend=self.trend,
solver=self.solver,
maxiter=self.maxiter,
full_output=self.full_output,
disp=self.disp,
callback=self.callback,
start_ar_lags=self.start_ar_lags,
)
logging.info("Fitted arima.")
# pyre-fixme[14]: `predict` overrides method defined in `Model` inconsistently.
[docs] def predict(
self, steps: int, include_history: bool = False, **kwargs
) -> pd.DataFrame:
"""Predict with fitted ARIMA model
Args:
steps: An integer for forecast steps
include_history: Optional; A boolearn to specify whether to include historical data. Default is False.
Returns:
A pd.DataFrame that contains the forecast and confidence interval
"""
logging.debug(
"Call predict() with parameters. "
"steps:{steps}, kwargs:{kwargs}".format(steps=steps, kwargs=kwargs)
)
self.include_history = include_history
self.exog = kwargs.get("exog", None)
self.alpha = kwargs.get("alpha", 0.05)
self.freq = kwargs.get("freq", pd.infer_freq(self.data.time))
fcst = self.model.forecast(steps, exog=self.exog, alpha=self.alpha)
logging.info("Generated forecast data from arima model.")
logging.debug("Forecast data: {fcst}".format(fcst=fcst))
self.y_fcst = fcst[0]
self.y_fcst_lower = np.array([x[0] for x in fcst[2]])
self.y_fcst_upper = np.array([x[1] for x in fcst[2]])
last_date = self.data.time.max()
dates = pd.date_range(start=last_date, periods=steps + 1, freq=self.freq)
# pyre-fixme[16]: `ARIMAModel` has no attribute `dates`.
self.dates = dates[dates != last_date] # Return correct number of periods
self.fcst_df = pd.DataFrame(
{
"time": self.dates,
"fcst": self.y_fcst,
"fcst_lower": self.y_fcst_lower,
"fcst_upper": self.y_fcst_upper,
}
)
if self.include_history:
try:
hist_fcst = (
self.model.predict(self.params.d, len(self.data))
.reset_index()
.rename(columns={"index": "time", 0: "fcst"})
)
self.fcst_df = pd.concat([hist_fcst, self.fcst_df])
except Exception as e:
msg = f"Fail to generate in-sample forecasts for historical data with error message {e}."
logging.error(msg)
raise ValueError(msg)
logging.debug("Return forecast data: {fcst_df}".format(fcst_df=self.fcst_df))
return self.fcst_df
[docs] def plot(self):
"""Plot forecast results from the ARIMA model
"""
m.Model.plot(self.data, self.fcst_df)
def __str__(self):
return "ARIMA"
[docs] @staticmethod
def get_parameter_search_space() -> List[Dict[str, Any]]:
"""Get default ARIMA parameter search space.
Args:
None
Returns:
A dictionary with the default ARIMA parameter search space
"""
return get_default_arima_parameter_search_space()