############# Prediction and Forecasting (Code #10) ############# SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep=",") x_ibm <- SFX_da$IBM x_xom <- SFX_da$XOM x_ge <- SFX_da$GE x_Mkt_RF<- SFX_da$Mkt_RF x_SMB <- SFX_da$SMB x_HML <- SFX_da$HML x_RF <- SFX_da$RF T <- length(x_ibm) lr_ibm <- log(x_ibm[-1]/x_ibm[-T]) Mkt_RF <- x_Mkt_RF[-1]/100 SMB <- x_SMB[-1]/100 HML <- x_HML[-1]/100 RF <- x_RF[-1]/100 ibm_x <- lr_ibm - RF y <- ibm_x; x1 <- Mkt_RF x2 <- SMB x3 <- HML T <- length(y) x0 <- matrix(1,T,1) x <- cbind(x0,Mkt_RF,SMB,HML) k <- ncol(x) fit_ibm_ff3 <- lm (y ~ x - 1) # Restricted Regression b_ibm <- fit_ibm_ff3$coefficients # regression coefficients, b e_ibm <- fit_ibm_ff3$residuals Sigma2 <- sum(e_ibm^2)/(T - k) # Estimate of sigma^2 ("irreducible") #### Given x_0, compute forecast, forecast error and C.I. x_0 <- rbind(1.0000, -0.0189, -0.0142, -0.0027) y_0 <- 0.1555214 y_f0 <- t(b_ibm)%*% x_0 # In sample prediction, given x0 y_f0 ef_0 <- y_f0 - y_0 # In sample error ef_0 Var_b <- vcov(fit_ibm_ff3) # Extract covariance from fit_ibm var_ef_0 <- t(x_0)%*% Var_b%*% x_0 + Sigma2 var_ef_0 sqrt(var_ef_0) # (1-alpha)% C.I. for prediction (alpha = .05) CI_lb <- y_f0 - 1.96 * sqrt(var_ef_0) CI_lb CI_ub <- y_f0 + 1.96 * sqrt(var_ef_0) CI_ub #### CV: Cross-Validation y <- ibm_x ff_cv_data <- data.frame(Mkt_RF, SMB, HML) ### CV: Cross-Validation K-fold Code Function CV <- function(dats, n.folds){ folds <- list() # flexible object for storing folds fold.size <- nrow(dats)/n.folds remain <- 1:nrow(dats) # all obs are in for (i in 1:n.folds){ select <- sample(remain, fold.size, replace = FALSE) #randomly sample fold_size from remaining obs) folds[[i]] <- select # store indices (write a special statement for last fold if ‘leftover points’) if (i == n.folds){ folds[[i]] <- remain } remain <- setdiff(remain, select) #update remaining indices to reflect what was taken out remain } results <- matrix(0,1,n.folds) # An empty matrix to accumulate K MSEs for (i in 1:n.folds){ # fold i indis <- folds[[i]] #unpack into a vector estim <- dats[-indis, ] #split into estimation (train) & validation (test) sets test <- dats[indis, ] lm.model <- lm(y[-indis] ~ ., data = estim) # OLS with estimation data pred <- predict(lm.model, newdata = test) # predicted values for fold not used MSE <- mean((y[indis] - pred)^2) # MSE (any other evaluation measure can be used) results[[i]]<- MSE # Accumulate MSE in vector } return(results) } CV_ff_5 <- CV(ff_cv_data, 5) mean(CV_ff_5) T_Loocv <- T - 1 LOOCV_ff <- CV(ff_cv_data, T_Loocv) mean(LOOCV_ff) #### Measures of Accuracy: MSE T0 <- 1 T1 <- 539 # End of Estimation Period (Dec 2017) T2 <- T1+1 # Start of Validation Period y1 <- y[T0:T1] x1 <- x[T0:T1,] fit_ibm_2 <- lm(y1~ x1 - 1) # Estimation Period Regression From T0 to T1 b1 <- fit_ibm_2$coefficients # Extract OLS coefficients from regression summary(fit_ibm_2) x_0 <- x[T2:T,] # Validation data y_0 <- y[T2:T] # Validation data y_f0 <- x_0%*% b_ibm # Forecast ef_0 <- y_f0 - y_0 # Forecasat error mse_ef_0 <- sum(ef_0^2)/nrow(x_0) # MSE mse_ef_0 mae_ef_0 <- sum(abs(ef_0))/nrow(x_0) # MAE mae_ef_0 plot(y_f0, type="l", col="red", main = "IBM: Actual vs. Forecast (2018-2023)", xlab = "Obs", ylab = "Forecast") lines(y_0, type = "l", col = "blue") legend("topleft", legend = c("Actual", "Forecast"), col = c("blue", "red"), lty = 1) #### Testing Accuracy: MGN T1 <- 539 # End of Estimation Period x_0f <- x[T1:(T-1),] # By assumption on the X, it starts at T1. y_0 <- y[T2:T] # T2 = T1 + 1 y_f0 <- x_0f %*% b1 # b1 coefficients from previous regresssion ef_0 <- y_f0 - y_0 # et (2) mse_ef_0 <- sum(ef_0^2)/nrow(x_0) mse_ef_0 # MSE(2) ef_rw_0 <- y[T1:(T-1)] - y_0 # et (1) mse_ef_rw_0 <- sum(ef_rw_0^2)/nrow(x_0) mse_ef_rw_0 # MSE(1). # Step 1. Define errors and z & x z_mgn <- ef_rw_0 + ef_0 x_mgn <- ef_rw_0 - ef_0 # Step 2. Regress x on z fit_mgn <- lm(z_mgn ~ x_mgn) summary(fit_mgn) # Step 3. t-test on beta coef(summary(fit_mgn))[, "t value"] #### Forecasting: Fundamental Approach FX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/FX_USA_JAP.csv", head=TRUE, sep=",") ### Steps 1 & @: Collect data, define variables and transform data. us_I <- FX_da$US_INF # Read US Inflation (IUS) data from file us_mg <- FX_da$US_M1_c # Read US Money growth (mUS) data from file us_i <- FX_da$US_I3M # Read US 3-mo Interest rate (iUS) data from file us_y <- FX_da$US_GDP_g # Read US GDP growth (yUS) data from file us_tb <- FX_da$US_CA_c # Read US Current account change (tbUS) data from file jp_I <- FX_da$JAP_INF # Read Japan Inflation (IUS) data from file jp_mg <- FX_da$JAP_MI_c # Read Japan Money growth (mJP) data from file jp_i <- FX_da$JAP_I3M # Read Japan 3-mo Interest rate (iJP) data from file jp_y <- FX_da$JAP_GDP_g # Read Japan GDP growth yJP) data from file jp_tb <- FX_da$JAP_CA_c # Read Japan Current account change (tbJP) data from file e_f <- FX_da$JPY.USD_c # Read changes in JPY/USD (e) inf_dif <- us_I - jp_I # Define inflation rate differential (inf_dif) int_dif <- us_i - jp_i y_dif <- us_y - jp_y tb_dif <- us_tb - jp_tb xx <- cbind(int_dif, y_dif) T <- length(e_f) T_est <- 161 # Define final observation for estimation period. e_f1 <- e_f[1:T_est] # Adjust sample size to T_est xx_1 <- xx[1:T_est,] # Adjust sample size to T_est ### Step 3. Estimate Model (only with estimation period, 1 to T_est) fit_ef <- lm(e_f1 ~ xx_1) summary(fit_ef) ### Step 4. Generate forecasts ## 4.1 Model forecast driving variables (X's) using info from estimation period only # AR(1) Model for int_dif int_dif_lag1 <- int_dif[1:T_est-1] # Lag (iUS,t - iJAP,t) int_dif_lag0 <- int_dif[2:T_est] # Adjust sample size (lost one observation above) fit_int <- lm(int_dif_lag0 ~ int_dif_lag1) # Fit AR(1) model summary(fit_int) # AR(1) Model for y_dif y_dif_lag1 <- y_dif[1:T_est-1] # Lag (yUS,t - yJAP,t) y_dif_lag0 <- y_dif[2:T_est] # Adjust sample size (lost one observation above) fit_y <- lm(y_dif_lag0 ~ y_dif_lag1) # Fit AR(1) model summary(fit_y) # AR(1) Model One-step=ahead forecasts (starting at T_val = T_est + 1) for driving variables T_val <- T_est+1 # start of Validation period xx_cons <- rep(1,T-T_val+1) # create the constant vector int_dif_0 <- cbind(xx_cons,xx[T_val:T,1]) %*% fit_int$coeff # 8 forecasts for (iUS,t - iJAP,t) y_dif_0 <- cbind(xx_cons,xx[T_val:T,2]) %*% fit_y$coeff # 8 forecasts for (yUS,t - yJAP,t) # Model One-step=ahead forecasts (starting at T_val = T_est + 1) for e_f (& MSE) e_Mod_0 <- cbind(xx_cons,int_dif_0,y_dif_0)%*%fit_ef$coeff # Model's forecast f_e_Mod <- e_f[T_val:T] - e_Mod_0 # Model's forecast error mse_e_f <- sum(f_e_Mod^2)/(T-T_val+1) # Model's MSE mse_e_f ## RW one-step forecast for e_f (& MSE) e_f_RW_0 <- rep(0,T-T_val+1) # RW forecast = 0 (always 0, for all t+T!) f_e_RW <- e_f[T_val:T] - e_f_RW_0 # RW's forecast error mse_e_RW <- sum(f_e_RW^2)/(T-T_val+1) # RW's MSE mse_e_RW ### Step 5. Evaluation of Forecasts z_mgn <- f_e_Mod + f_e_RW x_mgn <- f_e_Mod - f_e_RW fit_mgn <- lm(z_mgn ~ x_mgn) summary(fit_mgn) ### Step 6. Out-of-sample Forecasts # out-of-sample forecat for int_dif int_dif_p1 <- cbind(1,int_dif[T])%*%fit_int$coeff # int_dif_p1 = Et=2020:II[(iUS,t - iJAP)t+1=2020:III] int_dif_p1 # out-of-sample forecat for mg_dif mg_dif_p1 <- cbind(1,mg_dif[T])%*%fit_mg$coeff # mg_dif_p1 = Et=2020:II[(mUS,t - mJAP)t+1=2020:III] mg_dif_p1 # out-of-sample forecat for y_dif y_dif_p1 <- cbind(1,y_dif[T])%*%fit_y$coeff # y_dif_p1 = Et=2020:II[(yUS,t - yJAP)t+1=2020:III] y_dif_p1 S <- 0.009279 # Today's value of St=2020:II e_f_p1 <- cbind(1,int_dif_p1,mg_dif_p1,y_dif_p1) %*% fit_ef$coeff # Today's forecast for et=2020:III e_f_p1 S_p1 <- S * (1+e_f_p1/100) # Model's forecast for St+1=2020:III S_p1 ## Use the one-step-ahead forecasts to generate two-step-ahead forecasts. => Forecast Et=2020:II[St+1=2020:IV] (=S_p2 below) S1 <- S_p1 # Today's value of St+1=2020:III int_dif_p2 <- cbind(1,int_dif_p1)%*%fit_int$coeff # Today's forecast for (iUS - iJP)t+2 mg_dif_p2 <- cbind(1,mg_dif_p1)%*%fit_mg$coeff # Today's forecast for (mUS - mJP)t+2 y_dif_p2 <- cbind(1,y_dif_p1)%*%fit_y$coeff # Today's forecast for (yUS - yJP)t+2 e_f_p2 <- cbind(1,int_dif_p2,mg_dif_p2,y_dif_p2)%*%fit_ef$coeff # Today's forecast for et=2020:IV e_f_p2 S_p2 <- S1*(1+e_f_p2/100) # Today's forecast for St=2020:IV S_p2