FMX_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/FX_USA_MX.csv", head=TRUE, sep=",")
summary(FMX_da)

x_years <- FMX_da$Years
us_CPI <- FMX_da$US_CPI
us_M1 <- FMX_da$US_M1
us_i <- FMX_da$US_int
us_GDP <- FMX_da$US_GDP
us_CA <- FMX_da$US_CA
mx_CPI <- FMX_da$MX_CPI
mx_M1 <- FMX_da$MX_M1
mx_i <- FMX_da$MX_int
mx_GDP <- FMX_da$MX_GDP
mx_CA <- FMX_da$MX_CA
S_mx <- FMX_da$MXN_USD

T <- length(us_CPI)
mx_I <- log(mx_CPI[-1]/mx_CPI[-T])	# Mex Inflation rate
mx_y <- log(mx_GDP[-1]/mx_GDP[-T])	# Mex Income growth rate
e_mx <- log(S_mx[-1]/S_mx[-T])		# Change in FX rate (MXN/USD)
us_i_1 <- us_i[-1]/100			# Adjust to decimal percentage and sample size
mx_i_1 <- mx_i[-1]/100			# Adjust to decimal percentage and sample size
mx_i_0 <- mx_i[-T]/100			# Adjust to decimal percentage and sample size

T_new <- length(mx_i_1)

###### Q2.1 ###### 

## 2.1.a - Report Regression
fit_i <- lm(mx_i_1 ~ us_i_1 + e_mx + mx_I + mx_y)
summary(fit_i)

## 2.1.b - Report & interpret R^2 & Beta_1
# R2 = 0.8307 (83% of the variability of mexican interest rates is explained by the variables in the model
# Beta_ 1 = 0.68262 (a 1% change in US interest rate increases Mex interest rates by 0.68%)

## 2.1.c - Drivers of regression 
# US interest rates & Mex inflation rates

## 2.1.d - Heteroscedasticity Tests: GQ
gqtest(fit_i)

## 2.1.e - Heteroscedasticity Tests: LM-BP
e_fit <- fit_i$residuals
e_fit2 <- e_fit^2		# Potential driver of variance
mx_I2 <- mx_I^2			# Potential driver of variance
e_mx2 <- e_mx^2			# Potential driver of variance

fit_i_BP <- lm(e_fit2 ~ us_i_1 + mx_I2 + e_mx2)	# Aux Reg
Re_e2 <- summary(fit_i_BP)$r.squared		# Extract R^2 from Aux Regression, Re_e2
Re_e2 
LM_BP_test <- Re_e2 * T				# Compute LM Test: Re_e2 * length of data
LM_BP_test

p_val <- 1 - pchisq(LM_BP_test, df = 3) 	 # p-value of LM_test & df= 3 variables in Aux Reg
p_val

## 2.1.f - White SE
library(sandwich)
## White SE 
White <- vcovHC(fit_i, type = "HC0")
SE_White <- sqrt(diag(White))
b_i_r <- fit_i$coefficients					# White SE HC0
t_White <- b_i_r/SE_White
SE_White
t_White

## 2.1.g - DW Test for AR(1)
dwtest(fit_i)

## 2.1.h - LM Test for AR(p)
bgtest(fit_i, order=4)

## 2.1.i -  NW SE 
Var_NW <- NeweyWest(fit_i, lag = 12)
SE_NW <- sqrt(diag(Var_NW))
b_i_r <- fit_i$coefficients
t_b_NW <- b_i_r/SE_NW
t_b_NW


###### Q2.2 ###### 
## 2.2.a -  Estimation Period
y <- mx_i_1
xx_i <- cbind(us_i_1, e_mx, mx_I, mx_y)		# X matrix

T0 <- 1
T1 <- 170							# End of Estimation Period (2020.4)
T2 <- T1+1							# Start of Validation Period (2021.1)
y1 <- y[T0:T1]
x1 <- xx_i[T0:T1,]
fit_i_est <- lm(y1 ~  x1)					# Estimation Period Regression (1971.2 - 2020.4)
b_est <- fit_i_est$coefficients					# Extract OLS coefficients from regression
b_est								# OLS coefficients
summary(fit_i_est)

# RW Assumption For Independent Variables
xx_cons <- rep(1,T_new-T2+1)
k_for <- length(xx_cons)					# Create a constant for Validation forecasts
y_mod_f0 <- cbind(xx_cons,xx_i[T1:(T_new-1),]) %*% b_est	# Validation period data
e_mod_f0 <- y[T2:T_new] - y_mod_f0				# Forecasat error
mse_e_f0 <- sum(e_mod_f0^2)/k_for				# MSE
mse_e_f0

## 2.1.g -  Out of sample forecast for Mex interest rate in 2023.3
y_mod_f_1 <- c(xx_cons[1],xx_i[T_new,]) %*% b_est
y_mod_f_1 


###### Q2.3 ###### 

RE_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Real_Estate_2023.csv", head=TRUE, sep=",")
summary(RE_da)

x_date <- RE_da$DATE
x_lv <- RE_da$LV_c
u_lv <- RE_da$LV_u
x_tech <- RE_da$TPI_FRBSF_c 
Nind <- RE_da$Nind_c
ec_lv <- RE_da$LAS_EC
x_Mkt <- RE_da$Mkt_RF
x_SMB <- RE_da$SMB
x_HML <- RE_da$HML
x_RMW <- RE_da$RMW
x_CMA <- RE_da$CMA
Mkt_RF <- x_Mkt/100
SMB <- x_SMB/100
HML <- x_HML/100
RMW <- x_RMW/100
CMA <- x_CMA/100

zz <- x_lv
T <- length(x_lv)
Fin_c0 <- rep(0,224)
Fin_c1 <- rep(1,132)
Fin_c <- c(Fin_c0,Fin_c1)								# Create 2008 Financial crisis dummy
Feb1 <- rep(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create January dummy
Mar1 <- rep(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create March dummy
Apr1 <- rep(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create April dummy
May1 <- rep(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create May dummy
Jun1 <- rep(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create June dummy
Jul1 <- rep(c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create Jul dummy
Aug1 <- rep(c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create Aug dummy
Sep1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), (length(zz)/12+1))	# Create Sep dummy
Oct1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), (length(zz)/12+1))	# Create Oct dummy
Nov1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (length(zz)/12+1))	# Create Oct dummy
Dec1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (length(zz)/12+1))	# Create Oct dummy
Feb <- Feb1[1:T]
Mar <- Mar1[1:T]
Apr <- Apr1[1:T]
May <- May1[1:T]
Jun <- Jun1[1:T]
Jul <- Jul1[1:T]
Aug <- Aug1[1:T]
Sep <- Sep1[1:T]
Oct <- Oct1[1:T]
Nov <- Nov1[1:T]
Dec <- Dec1[1:T]
Spring <- Mar + Apr + May
Summ <- Jun + Jul + Aug
Fall <- Sep + Oct + Nov
u_lv2 <- u_lv^2
x_tech2 <- x_tech^2
Nind2 <- Nind^2
u_lv_Nind <- u_lv*Nind
u_lv_tech <- u_lv*x_tech
u_lv_Spring <- u_lv*Spring
u_lv_Summ <- u_lv*Summ
u_lv_Fall <- u_lv*Fall
Nind_Spring <- Nind*Spring
Nind_Summ <- Nind*Summ
Nind_Fall <- Nind*Fall
tech_Spring <- x_tech*Spring
tech_Summ <- x_tech*Summ
tech_Fall <- x_tech*Fall
u_lv_Finc <- u_lv*Fin_c
Nind_Finc <- Nind*Fin_c
tech_Finc <- x_tech*Fin_c
Finc_Spring <- Fin_c*Spring
Finc_Summ <- Fin_c*Summ
Finc_Fall <- Fin_c*Fall


## 2.3.a - GUM and Reduced (Specific) Models

fit_lv_gum <- lm(formula = x_lv ~ u_lv + x_tech + Nind + + Mkt_RF + SMB + HML + RMW + CMA +  
    u_lv2 + Nind2 + x_tech2 + Spring + Summ + Fall + Fin_c + u_lv_Nind + u_lv_tech + u_lv_Spring + 
    u_lv_Summ + u_lv_Fall + Nind_Spring + Nind_Summ + Nind_Fall + 
    tech_Spring + tech_Summ + tech_Fall + u_lv_Finc + Nind_Finc + 
    tech_Finc + Finc_Spring + Finc_Summ + Finc_Fall)

summary(fit_lv_gum)

## Keeeping all variables with a p-value of 10% or less.
fit_lv_red <- lm(x_lv ~ u_lv2 + x_tech2 + Spring + Fin_c + tech_Summ + u_lv_Finc)
summary(fit_lv_red)

## 2.3.b -  Drivers (variables with significant t-stats at 5% level)
# Drivers: u_lv2,  x_tech2,  u_lv_Finc)

## 2.3.c -  Financial Crisis?
# Fin_c not significant in reduced model. Spring and Summer (interacting with Tech factor) also not signficant. No evidence of lasting effect of 2008 Financial Crisis and seasonality on LAS home prices.

## 2.3.d - JB Test, using package tseries (function jarque.bera.test)
e_lv <- fit_lv_gum$residuals

library(tseries)
jarque.bera.test(e_lv)

## 2.3.e - White and NW SE
# At 5% level, we have evidence of both heteroscedasticiy & autocorrelation. Then, use NW SE.
Var_NW <- NeweyWest(fit_lv_red, lag = 12)
SE_NW <- sqrt(diag(Var_NW))
b_lv_red <- fit_lv_red$coefficients
t_b_NW <- b_lv_red/SE_NW
t_b_NW

# Once, we take into account autocorrelation and heteroscedasticity, nothing is signficant. Model not good for LAS.


###### Q2.4 ######
SFX_da <-
  read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep
           =",") 
x_dat <- SFX_da$Date
x_dis <- SFX_da$DIS
x_Mkt_RF<- SFX_da$Mkt_RF
x_SMB <- SFX_da$SMB
x_HML <- SFX_da$HML
x_CMA <- SFX_da$CMA
x_RMW <- SFX_da$RMW
x_RF <- SFX_da$RF

T <- length(x_slb)
lr_dis <- log(x_dis[-1]/x_dis[-T])
Mkt_RF <- x_Mkt_RF[-1]/100
SMB <- x_SMB[-1]/100
HML <- x_HML[-1]/100
CMA <- x_CMA[-1]/100
RMW <- x_RMW[-1]/100
RF <- x_RF[-1]/100

dis_x <- lr_dis - RF

## 4.a
fit1 <- lm(dis_x ~ Mkt_RF + SMB + HML)		# Model 1
summary(fit1)

fit2 <- lm(dis_x ~ Mkt_RF + CMA + RMW) 		# Model 2
summary(fit2)


## 4.b
library(lmtest)
jtest(fit1,fit2)



###### Q2.5 - THEORY REVIEW ######
# 2.5.a. False. Violations of Assumption (A3) make us use OLS with appropriate Standard Errors (White or NW)
# 2.5.b. False. White SE are used when only heteroscedasticy is present. For autocorrelated errors, we use NW.
# 2.5.c. True. (A3) does not affect unbiaseded (or consistency)
# 2.5.d. False. The J-test can reject both models
# 2.5.e. False. The Chow test for structural test is conditional on a given date. IF the data is wrong, the Chow test is looking at the wrong time for structural change.