############# Time Series: Seasonalities & Forecasting (Code #17) ############# ############# REVIEW: Time Series - ARMA: Identification & Estimation (Code #16) ############# ##### Reading Shiller & PPP data sets and defining variables Sh_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Shiller_data.csv", head=TRUE, sep=",") names(Sh_da) x_P <- Sh_da$P x_D <- Sh_da$D x_i <- Sh_da$Long_i T <- length(x_P) lr_p <- log(x_P[-1]/x_P[-T]) lr_d <- log(x_D[-1]/x_D[-T]) lr_p_ww2 <- lr_p[924:(T-1)] lr_d_ww2 <- lr_d[924:(T-1)] ## Identification - Difference: Changes in Stock Prices & Changes in Stock Dividends acf_p <- acf(lr_p) pacf_p <- pacf(lr_p) acf_d <- acf(lr_d) pacf_d <- pacf(lr_d) acf_p2 <- acf(lr_p_ww2) pacf_p2 <- pacf(lr_p_ww2) acf_d2 <- acf(lr_d_ww2) pacf_d2 <- pacf(lr_d_ww2) ## Automatic Identification using Minic library(forecast) # Need to install package forecast first auto_p <- auto.arima(lr_p, ic="bic", trace=TRUE) auto_p <- auto.arima(lr_p_ww2, ic="bic", trace=TRUE) ### Estimation & Diagnostic Testing: Change in Stock Prices post-WW2 arima(lr_p_ww2, order=c(0,0,1)) # Use arima function autoplot(auto_p) checkresiduals(auto_p) auto_d_ww2 <- auto.arima(lr_d_ww2, ic="bic", trace=TRUE) ### Estimation & Diagnostic Testing: Change in Dividends post-WW2 arima(lr_d_ww2, order=c(1,0,0)) autoplot(auto_d_ww2) checkresiduals(auto_d_ww2) ### Automatic Identification stock Returns: IBM, DIS, CAT SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep=",") names(SFX_da) summary(SFX_da) x_date <- SFX_da$Date x_cnp <- SFX_da$CNP x_xom <- SFX_da$XOM x_pfe <- SFX_da$PFE x_gold <- SFX_da$Gold T <- length(x_cnp) T lr_cnp <- log(x_cnp[-1]/x_cnp[-T]) lr_xom <- log(x_xom[-1]/x_xom[-T]) lr_pfe <- log(x_pfe[-1]/x_pfe[-T]) gold <- log(x_gold[-1]/x_gold[-T]) RF <- x_RF[-1]/100 cnp_x <- lr_cnp - RF xom_x <- lr_xom - RF pfe_x <- lr_pfe - RF auto.arima(cnp_x, ic="bic", trace=TRUE) auto.arima(xom_x, ic="bic", trace=TRUE) auto.arima(pfe_x, ic="bic", trace=TRUE) auto.arima(gold, ic="aic", trace=TRUE) plot(gold, type="l") ### Estimation & Diagnostic Testing: Change in Gold auto_gold <- arima(gold, order=c(4,0,0)) autoplot(auto_gold) checkresiduals(auto_gold) auto_gold2 <- arima(gold, order=c(12,0,0)) autoplot(auto_gold2) checkresiduals(auto_gold2) #### Seasonal ARMA (Deterministic) & Forecasting (Lecture 9-c) Car_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/TOTALNSA.csv", head=TRUE, sep=",") names(Car_da) x_car <- Car_da$TOTALNSA ### Stabilizing Transformations library(tseries) ts_car <- ts(x_car,start=c(1976,1),frequency=12) plot.ts(ts_car, xlab="Time", ylab="div", main="Total U.S. Vehicle Sales") mean(x_car) sd(x_car) ## Log Transformation l_car <- log(ts_car) plot.ts(l_car, xlab="Time", ylab="div", main="Total U.S. Vehicle Sales") mean(l_car) sd(l_car) ## Box-Cox Transformation lambda <- 0.75 b_cox_car <- (ts_car^lambda - 1)/lambda plot.ts(b_cox_car, xlab="Time", ylab="cars", main= "Box-Cox Total U.S. Vehicle Sales") mean(b_cox_car) sd(b_cox_car) ### Seasonality Deterministic (with Dummy Variables) y_sim <- matrix(0,T_sim,1) # vector to accumulate simulated data Seas_12 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (length(y_sim)/12+1)) # Create Dec dummy T_sim <- 500 u <- rnorm(T_sim, sd=0.75) # Draw T_sim normally distributed errors phi1 <- 0.2 # Change to create different correlation patterns k <- 12 # Seasonal Periodicity a <- k+1 # Time index for observations mu <- 0.2 mu_s <- .02 while (a <= T_sim) { y_sim[a] = mu + phi1 * y_sim[a-1] + Seas_12[a] * mu_s * a + u[a] # y_sim simulated autocorrelated values a <- a + 1 } y_seas <- y_sim[(k+1):T_sim] plot(y_seas, type="l", main="Simulated Deterministic Seasonality") acf(y_seas) pacf(y_seas) ## Regression to detrend and remove deterministic seasonalities => Get filtered series (residuals) T_sim1 <- length(y_seas) trend <- c(1:T_sim) trend_sim <- trend[(k+1):T_sim] seas_d <- Seas_12[(k+1):T_sim] sea_trend <- seas_d*trend_sim fit_seas <- lm(y_seas ~ seas_d + trend_sim + sea_trend) summary(fit_seas) y_seas_filt <- fit_seas$residuals # Filtered series acf(y_seas_filt) pacf(y_seas_filt) ## Fit ARMA model on filtered series and check if there's seasonal pattern remaining fit_y_seas_ar1 <- arima(y_seas_filt, order=c(1,0,0)) fit_y_seas_ar1 y_seas_filt_2 <- fit_y_seas_ar1$residuals # Extract Residuals (Filtered again) plot(y_seas_filt_2,type="l", main="AR(1) Residuals") acf(y_seas_filt_2, main="ACF: AR(1) Residuals") pacf(y_seas_filt_2, main="ACF: AR(1) Residuals") ## Real Estate Example: LA Home Prices RE_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Real_Estate_2022.csv", head=TRUE, sep=",") x_la <- RE_da$LA_c zz <- x_la T <- length(zz) plot(x_la, type="l", main="Changes in Log Real Estate Prices in LA") acf(x_la) pacf(x_la) Feb1 <- rep(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create January dummy Mar1 <- rep(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create March dummy Apr1 <- rep(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create April dummy May1 <- rep(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create May dummy Jun1 <- rep(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create June dummy Jul1 <- rep(c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create Jul dummy Aug1 <- rep(c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create Aug dummy Sep1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), (length(zz)/12+1)) # Create Sep dummy Oct1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), (length(zz)/12+1)) # Create Oct dummy Nov1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (length(zz)/12+1)) # Create Oct dummy Dec1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (length(zz)/12+1)) # Create Oct dummy seas1 <- cbind(Feb1, Mar1, Apr1, May1, Jun1, Jul1, Aug1, Sep1, Oct1, Nov1, Dec1) seas <- seas1[1:T,] x_la_fit_sea <- lm(x_la ~ seas) # Regress x_la against constant + seasonal dummies summary(x_la_fit_sea) x_la_filt <- x_la_fit_sea$residuals # residuals, et = filtered x_la series library(forecast) fit_ar_la_filt <- auto.arima(x_la_filt) # use auto.arima to look for a good model fit_ar_la_filt checkresiduals(fit_ar_la_filt) autoplot(fit_ar_la_filt) ### Forecasting library(tseries) library(forecast) ## US Historical Stock Returns Sh_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Shiller_2020data.csv", head=TRUE, sep=",") names(Sh_da) x_P <- Sh_da$P x_D <- Sh_da$D x_i <- Sh_da$Long_i T <- length(x_P) lr_p <- log(x_P[-1]/x_P[-T]) lr_d <- log(x_D[-1]/x_D[-T]) fit_p_ts <- arima(lr_p, order=c(0,0,1)) #fit an MA(1) model fcast_p <- forecast(fit_p_ts, h=4) #produce 4-step ahead forecasts fit_p_ts fcast_p plot(fcast_p) ## OIL Log Changes FMX_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/ppp_m.csv", head=TRUE, sep=",") names(FMX_da) x_S_usd <- FMX_da$TWEX_BROAD x_oil <- FMX_da$Crude_WTI_Oil T <- length(x_oil) lr_oil <- log(x_oil[-1]/x_oil[-T]) fit_oil_ts <- arima(lr_oil, order=c(4,0,0)) fcast_oil <- forecast(fit_oil_ts, h=12) fit_oil_ts fcast_oil plot(fcast_oil) ## Log Changes in Earnings x_E <- Sh_da$E T <- length(x_E) lr_e <- log(x_E[-1]/x_E[-T]) acf(lr_e) fit_e <- auto.arima(lr_e) auto.arima(lr_e) fcast_e <- forecast(fit_e, h=20) # h=number of step-ahead forecasts fcast_e plot(fcast_e, type="l", include = 24, main = "Changes in Earings: Forecast 2023:Aug - 2025:Apr") #We include the last 24 observations along the forecast. ## CAR SALES: Log Transformation l_car <- log(ts_car) plot.ts(l_car, xlab="Time", ylab="div", main="Total U.S. Vehicle Sales") mean(l_car) sd(l_car) ### SES Log Car Sales mod_car <- HoltWinters(l_car, beta=FALSE, gamma=FALSE) mod_car ### SES Log Dividend Changes mod1 <- HoltWinters(lr_d[1:1750], beta=FALSE, gamma=FALSE) mod1 plot(mod1$fitted, main= "Log Dividend Changes: SES" ) ## One step forecast errors T_last <- nrow(mod1$fitted) # number of in-sample forecasts h <- 25 # forecast horizon ses_f <- matrix(0,h,1) # Vector to collect forecasts alpha <- 0.29 y <- lr_d T <- length(lr_d) sm <- matrix(0,T,1) T1 <- T - h + 1 # Start of forecasts a <- T1 # index for while loop sm[a-1] <- mod1$fitted[T_last] # last in-sample forecast while (a <= T) { sm[a] = alpha * y[a-1] + (1-alpha) * sm[a-1] a <- a + 1 } ses_f <- sm[T1:T] ses_f f_error_ses <- sm[T1:T] - y[T1:T] # forecast errors MSE_ses <- sum(f_error_ses^2)/h # MSE plot(ses_f, type="l", main ="SES Forecasts: Changes in Dividends") forecast(mod1, h=25, level=.95) ### Holt-Winters components_car <- decompose(l_car) plot(components_car) HW_car <- HoltWinters(l_car, seasonal = "additive") HW_car # Custom Holt-Winters fitting (Specifying alpha, beta & gamma) HW_car_2 <- HoltWinters(l_car, alpha=0.2, beta=0.1, gamma=0.1) HW_car_2 #Visual evaluation of fit plot(l_car, main = "Log Car Sales", ylab="Log Car Sales") lines(HW_car$fitted[,1], lty=2, col="blue") lines(HW_car_2$fitted[,1], lty=2, col="red") # Forecasting HW_car.pred <- predict(HW_car, 24, prediction.interval = TRUE, level=0.95) forecast(HW_car, h=24, level=.95) # Visual evaluation of Fit and Forecasts plot(l_car, main = "Forecasting Log Car Sales", ylab="Log CAr Sales") lines(HW_car$fitted[,1], lty=2, col="blue") # History of Series lines(HW_car.pred[,1], col="red") # Predictions lines(HW_car.pred[,2], lty=2, col="orange") # Upper 95% C.I. band lines(HW_car.pred[,3], lty=2, col="orange") # Lower 95% C.I. band # In-sample MSE HW_car_mse <- sum(HW_car$fitted[,1]^2)/length(HW_car$fitted[,1]) HW_car_mse # Multiplicative Model and Forecast HW_car_m <- HoltWinters(l_car, seasonal ="multiplicative") HW_car_m HW_car_m.pred <- predict(HW_car_m, 24, prediction.interval = TRUE, level=0.95) forecast(HW_car_m, h=24, level=.95) #Visually evaluate the prediction plot(l_car, main = "Forecasting Log Car Sales", ylab="Log Car Sales") lines(HW_car_m$fitted[,1], lty=2, col="blue") # History of Series lines(HW_car_m.pred[,1], col="red") # Predictions lines(HW_car_m.pred[,2], lty=2, col="orange") # Upper 95% C.I. band lines(HW_car_m.pred[,3], lty=2, col="orange") # Lower 95% C.I. band # In-sample MSE HW_car_m_mse <- sum(HW_car_m$fitted[,1]^2)/length(HW_car_m$fitted[,1]) HW_car_m_mse # Using R package forecast library(forecast) HW_car_f <- forecast(HW_car, h=24, level=c(80,95)) # Visualize forcast: plot(HW_car_f, main = "Forecasting Log Car Sales", ylab="Log Car Sales") lines(HW_car_f$fitted, lty=2, col="purple") # Evalution of HW Model HW_car_fe <- HW_car_f$residuals #In-sample prediction errors acf(HW_car_fe, lag.max=20, na.action=na.pass) Box.test(HW_car_fe, lag=20, type="Ljung-Box") hist(HW_car_fe) # Testing accuracy of forecasts (Very informal) dm.test(HW_car.pred[,1], HW_car_m.pred[,1], power=2) dm.test(HW_car.pred[,1], HW_car_m.pred[,1], power=1)