#############  Prediction and Forecasting (Code #10) ############# 

SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep=",")
x_ibm <- SFX_da$IBM
x_xom <- SFX_da$XOM
x_ge <- SFX_da$GE
x_Mkt_RF<- SFX_da$Mkt_RF
x_SMB <- SFX_da$SMB
x_HML <- SFX_da$HML
x_RF <- SFX_da$RF

T <- length(x_ibm)
lr_ibm <- log(x_ibm[-1]/x_ibm[-T])
Mkt_RF <- x_Mkt_RF[-1]/100
SMB <- x_SMB[-1]/100
HML <- x_HML[-1]/100
RF <- x_RF[-1]/100
ibm_x <- lr_ibm - RF

y <- ibm_x; 
x1 <-  Mkt_RF
x2 <-  SMB
x3 <- HML
T <- length(y)
x0 <- matrix(1,T,1)
x <- cbind(x0,Mkt_RF,SMB,HML)
k <- ncol(x)

fit_ibm_ff3 <- lm (y ~ x - 1)				# Restricted Regression
b_ibm <- fit_ibm_ff3$coefficients			# regression coefficients, b
e_ibm <- fit_ibm_ff3$residuals
Sigma2 <- sum(e_ibm^2)/(T - k)				# Estimate of sigma^2 ("irreducible")


#### Given x_0, compute forecast, forecast error and C.I. 
x_0 <- rbind(1.0000, -0.0189, -0.0142, -0.0027)
y_0 <- 0.1555214
y_f0 <-  t(b_ibm)%*% x_0				# In sample prediction, given x0 
y_f0

ef_0 <- y_f0 - y_0					# In sample error
ef_0

Var_b <- vcov(fit_ibm_ff3)				# Extract covariance from fit_ibm
var_ef_0 <- t(x_0)%*% Var_b%*% x_0 + Sigma2
var_ef_0
sqrt(var_ef_0)

# (1-alpha)% C.I. for prediction	(alpha = .05)
CI_lb <- y_f0 - 1.96 * sqrt(var_ef_0) 
CI_lb

CI_ub <- y_f0 + 1.96 * sqrt(var_ef_0)
CI_ub


#### CV: Cross-Validation 
y <- ibm_x

ff_cv_data <- data.frame(Mkt_RF, SMB, HML) 

### CV: Cross-Validation K-fold Code Function 
CV <- function(dats, n.folds){
  folds <- list() 		# flexible object for storing folds
  fold.size <- nrow(dats)/n.folds
  remain <- 1:nrow(dats) 	# all obs are in
  
  for (i in 1:n.folds){
    select <- sample(remain, fold.size, replace = FALSE)	#randomly sample fold_size from remaining obs)
    folds[[i]] <- select 		# store indices (write a special statement for last fold if ‘leftover points’)
    if (i == n.folds){
      folds[[i]] <- remain
    }
   
    remain <- setdiff(remain, select) 	#update remaining indices to reflect what was taken out
    remain
  }
  
results <- matrix(0,1,n.folds)			# An empty matrix to accumulate K MSEs

for (i in 1:n.folds){
  # fold i
  indis <- folds[[i]] 				#unpack into a vector
  estim <- dats[-indis, ] 			#split into estimation (train) & validation (test) sets
  test <- dats[indis, ]
  
  lm.model <- lm(y[-indis] ~ ., data = estim)		# OLS with estimation data
  pred <- predict(lm.model, newdata = test)		# predicted values for fold not used
  MSE <- mean((y[indis] - pred)^2)		# MSE (any other evaluation measure can be used)
  results[[i]]<- MSE				# Accumulate MSE in vector
}
return(results)
}

CV_ff_5 <- CV(ff_cv_data, 5)
mean(CV_ff_5)

T_Loocv <- T - 1
LOOCV_ff <- CV(ff_cv_data, T_Loocv)
mean(LOOCV_ff)


#### Measures of Accuracy: MSE
T0 <- 1
T1 <- 539						# End of Estimation Period (Dec 2017)
T2 <- T1+1						# Start of Validation Period
y1 <- y[T0:T1]
x1 <- x[T0:T1,]
fit_ibm_2 <- lm(y1~ x1 - 1)				# Estimation Period Regression From T0 to T1
b1 <- fit_ibm_2$coefficients				# Extract OLS coefficients from regression
summary(fit_ibm_2)

x_0 <- x[T2:T,]						# Validation data
y_0 <- y[T2:T]						# Validation data
y_f0 <-  x_0%*% b_ibm					# Forecast 
ef_0 <- y_f0 - y_0					# Forecasat error
mse_ef_0 <- sum(ef_0^2)/nrow(x_0)			# MSE
mse_ef_0 

mae_ef_0 <- sum(abs(ef_0))/nrow(x_0)			# MAE
mae_ef_0

plot(y_f0, type="l", col="red", main = "IBM: Actual vs. Forecast (2018-2023)",
     xlab = "Obs",  ylab = "Forecast")
lines(y_0, type = "l", col = "blue")
legend("topleft",  legend = c("Actual", "Forecast"),  col = c("blue", "red"),  lty = 1)


#### Testing Accuracy: MGN
T1 <- 539						# End of Estimation Period
x_0f <- x[T1:(T-1),]					# By assumption on the X, it starts at T1.
y_0 <- y[T2:T]	# T2 = T1 + 1
y_f0 <-  x_0f %*% b1					# b1 coefficients from previous regresssion
ef_0 <- y_f0 - y_0					# et (2) 
mse_ef_0 <- sum(ef_0^2)/nrow(x_0)
mse_ef_0						# MSE(2)

ef_rw_0 <- y[T1:(T-1)] - y_0				# et (1) 
mse_ef_rw_0 <- sum(ef_rw_0^2)/nrow(x_0)
mse_ef_rw_0						# MSE(1).

# Step 1. Define errors and z & x
z_mgn <- ef_rw_0 + ef_0
x_mgn <- ef_rw_0 - ef_0

# Step 2. Regress x on z
fit_mgn <- lm(z_mgn ~ x_mgn)
summary(fit_mgn)

# Step 3. t-test on beta
coef(summary(fit_mgn))[, "t value"]



#### Forecasting: Fundamental Approach
FX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/FX_USA_JAP.csv", head=TRUE, sep=",")

### Steps 1 & @: Collect data, define variables and transform data.
us_I <- FX_da$US_INF					# Read US Inflation (IUS) data from file
us_mg <- FX_da$US_M1_c					# Read US Money growth (mUS) data from file
us_i <- FX_da$US_I3M					# Read US 3-mo Interest rate (iUS) data from file
us_y <- FX_da$US_GDP_g					# Read US GDP growth (yUS) data from file
us_tb <- FX_da$US_CA_c					# Read US Current account change (tbUS) data from file
jp_I <- FX_da$JAP_INF					# Read Japan Inflation (IUS) data from file
jp_mg <- FX_da$JAP_MI_c					# Read Japan Money growth (mJP) data from file
jp_i <- FX_da$JAP_I3M					# Read Japan 3-mo Interest rate (iJP) data from file
jp_y <- FX_da$JAP_GDP_g					# Read Japan GDP growth yJP) data from file
jp_tb <- FX_da$JAP_CA_c					# Read Japan Current account change (tbJP) data from file
e_f <- FX_da$JPY.USD_c					# Read changes in JPY/USD (e)

inf_dif <- us_I - jp_I					# Define inflation rate differential (inf_dif)
int_dif <- us_i - jp_i
y_dif <- us_y - jp_y
tb_dif <- us_tb - jp_tb
xx <- cbind(int_dif, y_dif)
T <- length(e_f)
T_est <- 161						# Define final observation for estimation period. 
e_f1 <- e_f[1:T_est]					# Adjust sample size to T_est
xx_1 <- xx[1:T_est,]					# Adjust sample size to T_est

### Step 3. Estimate Model (only with estimation period, 1 to T_est)
fit_ef <- lm(e_f1 ~ xx_1)
summary(fit_ef)

### Step 4. Generate forecasts 

## 4.1 Model forecast driving variables (X's) using info from estimation period only

# AR(1) Model for int_dif
int_dif_lag1 <- int_dif[1:T_est-1]				# Lag (iUS,t - iJAP,t) 
int_dif_lag0 <- int_dif[2:T_est]				# Adjust sample size (lost one observation above)
fit_int <- lm(int_dif_lag0 ~ int_dif_lag1)			# Fit AR(1) model  
summary(fit_int)


# AR(1) Model for y_dif
y_dif_lag1 <- y_dif[1:T_est-1]					# Lag (yUS,t - yJAP,t) 
y_dif_lag0 <- y_dif[2:T_est]					# Adjust sample size (lost one observation above)
fit_y <- lm(y_dif_lag0 ~ y_dif_lag1)				# Fit AR(1) model  
summary(fit_y)

# AR(1) Model One-step=ahead forecasts (starting at T_val = T_est + 1) for driving variables
T_val <- T_est+1						# start of Validation period
xx_cons <- rep(1,T-T_val+1)					# create the constant vector
int_dif_0 <- cbind(xx_cons,xx[T_val:T,1]) %*% fit_int$coeff	# 8 forecasts for (iUS,t - iJAP,t) 
y_dif_0 <- cbind(xx_cons,xx[T_val:T,2]) %*% fit_y$coeff		# 8 forecasts for (yUS,t - yJAP,t) 

# Model One-step=ahead forecasts (starting at T_val = T_est + 1) for e_f (& MSE)
e_Mod_0 <- cbind(xx_cons,int_dif_0,y_dif_0)%*%fit_ef$coeff	# Model's forecast
f_e_Mod <- e_f[T_val:T] - e_Mod_0 				# Model's forecast error
mse_e_f <- sum(f_e_Mod^2)/(T-T_val+1)				# Model's MSE
mse_e_f 


## RW one-step forecast for e_f (& MSE)
e_f_RW_0 <- rep(0,T-T_val+1)					# RW forecast = 0 (always 0, for all t+T!)
f_e_RW <- e_f[T_val:T] - e_f_RW_0 				# RW's forecast error	
mse_e_RW <- sum(f_e_RW^2)/(T-T_val+1)				# RW's MSE
mse_e_RW

### Step 5. Evaluation of Forecasts
z_mgn <- f_e_Mod + f_e_RW 
x_mgn <- f_e_Mod - f_e_RW
fit_mgn <- lm(z_mgn ~ x_mgn)
summary(fit_mgn)


### Step 6. Out-of-sample Forecasts
# out-of-sample forecat for int_dif
int_dif_p1 <- cbind(1,int_dif[T])%*%fit_int$coeff		# int_dif_p1 = Et=2020:II[(iUS,t - iJAP)t+1=2020:III] 
int_dif_p1

# out-of-sample forecat for mg_dif
mg_dif_p1 <- cbind(1,mg_dif[T])%*%fit_mg$coeff	 		# mg_dif_p1 = Et=2020:II[(mUS,t - mJAP)t+1=2020:III] 
mg_dif_p1

# out-of-sample forecat for y_dif
y_dif_p1 <- cbind(1,y_dif[T])%*%fit_y$coeff			 # y_dif_p1 = Et=2020:II[(yUS,t - yJAP)t+1=2020:III] 
y_dif_p1

S <- 0.009279							# Today's value of St=2020:II
e_f_p1 <- cbind(1,int_dif_p1,mg_dif_p1,y_dif_p1) %*% fit_ef$coeff		# Today's forecast for et=2020:III
e_f_p1
S_p1 <- S * (1+e_f_p1/100)					# Model's forecast for St+1=2020:III 
S_p1 


## Use the one-step-ahead forecasts to generate two-step-ahead forecasts. => Forecast Et=2020:II[St+1=2020:IV] 	(=S_p2 below)
S1 <- S_p1 							# Today's value of St+1=2020:III
int_dif_p2 <- cbind(1,int_dif_p1)%*%fit_int$coeff		# Today's forecast for (iUS - iJP)t+2
mg_dif_p2 <- cbind(1,mg_dif_p1)%*%fit_mg$coeff			# Today's forecast for (mUS - mJP)t+2
y_dif_p2 <- cbind(1,y_dif_p1)%*%fit_y$coeff			# Today's forecast for (yUS - yJP)t+2
e_f_p2 <- cbind(1,int_dif_p2,mg_dif_p2,y_dif_p2)%*%fit_ef$coeff # Today's forecast for et=2020:IV
e_f_p2

S_p2 <- S1*(1+e_f_p2/100)					# Today's forecast for St=2020:IV
S_p2