###### Exam 2 - Answers (Code) ######

###### Q1 ######
RE_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Real_Estate_2023.csv", head=TRUE, sep=",")
summary(RE_da)

x_date <- RE_da$DATE
x_sd <- RE_da$SD_c
x_usa <- RE_da$USA_c
u_sd <- RE_da$SD_u
x_tech <- RE_da$Tech_c 
Cind <- RE_da$Cind_c
Nind <- RE_da$Nind_c
x_Mkt <- RE_da$Mkt_RF
x_SMB <- RE_da$SMB
x_HML <- RE_da$HML
x_RMW <- RE_da$RMW
x_CMA <- RE_da$CMA
x_RF <- RE_da$RF
Mkt_RF <- x_Mkt/100
SMB <- x_SMB/100
HML <- x_HML/100
RMW <- x_RMW/100
CMA <- x_CMA/100
RF <- x_RF/100
oil <- RE_da$Oil
gold <- RE_da$Gold

zz <- x_sd
T <- length(x_sd)
T_sb <- 224
T_end_reg2 <- T - T_sb
Fin_c0 <- rep(0,T_sb)
Fin_c1 <- rep(1,T_end_reg2)
Fin_c <- c(Fin_c0,Fin_c1)								# Create 2008 Financial crisis dummy
Feb1 <- rep(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create January dummy
Mar1 <- rep(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create March dummy
Apr1 <- rep(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create April dummy
May1 <- rep(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create May dummy
Jun1 <- rep(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create June dummy
Jul1 <- rep(c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create Jul dummy
Aug1 <- rep(c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create Aug dummy
Sep1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), (length(zz)/12+1))	# Create Sep dummy
Oct1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), (length(zz)/12+1))	# Create Oct dummy
Nov1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (length(zz)/12+1))	# Create Oct dummy
Dec1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (length(zz)/12+1))	# Create Oct dummy
Feb <- Feb1[1:T]
Mar <- Mar1[1:T]
Apr <- Apr1[1:T]
May <- May1[1:T]
Jun <- Jun1[1:T]
Jul <- Jul1[1:T]
Aug <- Aug1[1:T]
Sep <- Sep1[1:T]
Oct <- Oct1[1:T]
Nov <- Nov1[1:T]
Dec <- Dec1[1:T]
Spring <- Mar + Apr + May
Summ <- Jun + Jul + Aug
Fall <- Sep + Oct + Nov
u_sd2 <- u_sd^2
x_usa2 <- x_usa^2
x_tech2 <- x_tech^2
Cind2 <- Cind^2
u_sd_Cind <- u_sd*Cind
u_sd_tech <- u_sd*x_tech
u_sd_Spring <- u_sd*Spring
u_sd_Summ <- u_sd*Summ
u_sd_Fall <- u_sd*Fall
Cind_Spring <- Cind*Spring
Cind_Summ <- Cind*Summ
Cind_Fall <- Cind*Fall
tech_Spring <- x_tech*Spring
tech_Summ <- x_tech*Summ
tech_Fall <- x_tech*Fall
u_sd_Finc <- u_sd*Fin_c
Cind_Finc <- Cind*Fin_c
tech_Finc <- x_tech*Fin_c
Finc_Spring <- Fin_c*Spring
Finc_Summ <- Fin_c*Summ
Finc_Fall <- Fin_c*Fall


## 1.a - GUM and Reduced (Specific) Models

fit_sd_gum <- lm(formula = x_sd ~ x_usa + u_sd + x_tech + Cind + + Mkt_RF + SMB + HML + RMW + CMA + gold + oil + x_usa2 +
                   u_sd2 + Cind2 + x_tech2 + Spring + Summ + Fall + Fin_c + u_sd_Cind + u_sd_tech + u_sd_Spring + 
                   u_sd_Summ + u_sd_Fall + Cind_Spring + Cind_Summ + Cind_Fall + 
                   tech_Spring + tech_Summ + tech_Fall + u_sd_Finc + Cind_Finc + 
                   tech_Finc + Finc_Spring + Finc_Summ + Finc_Fall)

summary(fit_sd_gum)

fit_sd_red <- lm(x_sd ~ x_usa + u_sd + x_usa2 + Fin_c + u_sd_Finc + Cind_Finc)
summary(fit_sd_red)

## 1.b -  Drivers (variables with significant t-stats a)
# Drivers: x_usa, u_sd, x_usa2, Financial crisis, interactions terms with Financial crisis: SD unemployment, and California leading indicators.

## 1.c -  Financial Crisis? 
# Yes! Fin_c is very significant (and negative) in reduced model. Interaction between Fin_c and Unemployment in SD is very significant (& also negative) 

## 1.d - Evidence of Seasonality?
# None of the seasonal dummies shows up as significant, directly or interacting with other variables.

## 1.e - JB Test
e_sd <- fit_sd_gum$residuals

library(tseries)
jarque.bera.test(e_sd)

## 1.f - LM Tests for autocorrelation (DW & BG) and Heteroscedasticity (GQ & BP)
dwtest(fit_sd_red)
bgtest(fit_sd_red, order=4)

## 1.g - LM Tests for autocorrelation (DW & BG) and Heteroscedasticity (GQ & BP)
gqtest(fit_sd_red)
bptest(fit_sd_red)

## 1.h - White and NW SE
# At 5% level, we have evidence of both heteroscedasticiy & autocorrelation. Then, use NW SE.
Var_NW <- NeweyWest(fit_sd_red, lag = 12)
SE_NW <- sqrt(diag(Var_NW))
b_sd_red <- fit_sd_red$coefficients
t_b_NW <- b_sd_red/SE_NW
t_b_NW

# Once, we take into account autocorrelation and heteroscedasticity only x_usa, u_sd, Finc_S and u_sd interating with Finc are significant at 5% (u_sd * Fin_c marginal)


## 1.i - LM Test for seasonality in residuals.
lm_seas <- lm(e_sd ~ Feb + Mar + Apr + May + Jun + Jul + Aug + Sep + Oct + Nov + Dec)
R2_r <- summary(lm_seas)$r.squared 	# extracting  R^2 from fit_lm 
R2_r
LM_test <- R2_r * length(e_sd)
LM_test
qchisq(.95, df = 11) 		# chi-squared (df=2) value at 5% level

p_val <- 1 - pchisq(LM_test, df = 11)  			# p-value of LM_test 
p_val

# Evidence of seasonality on residuals. We likely need to reformulate model.


###### Q2 ######

## 2.a AR(1) For Dependent Variables	(y = x_sd)
y <- x_sd
T1 <- 372	
y_1 <- y[1:(T1-1)]						# Estimation period data
y_0 <- y[2:T1]							# Estimation period data
fit_y <- lm(y_0 ~ y_1)						# Fit AR(1) model for e_f,t
b_y <- fit_y$coefficients
summary(fit_y)

## 2.b AR(1) Forecast S_t+1
T_for <- T-T1
xx_cons <- rep(1,T_for)
T_val <- T1 + 1
y_f0 <-  cbind(xx_cons,y[T1:(T-1)])%*% b_y			# b_est coefficients from estimation period regresssion
S_ar1_f0 <- S[T1:(T-1)]*(1+y_f0)				# Forecast for S_t, using validation data
e_ar1_f0 <- S[T_val:T] - S_ar1_f0				# Forecasat error
mse_e_ar1_f0 <- sum(e_ar1_f0^2)/k_for				# MSE
mse_e_ar1_f0							# MSE(2)


## 2.c RW Forecast S_t+1
e_rw_f0 <- S[T_val:T] - S[T1:(T-1)]				# Error for RW model =>  et (1) 	
mse_e_rw_f0 <- sum(e_rw_f0^2)/k_for	
mse_e_rw_f0

## 2.d Testing Equality of MSE: Mod vs RW
z_mgn <- e_rw_f0 + e_ar1_f0 
x_mgn <- e_rw_f0 - e_ar1_f0 
fit_mgn <- lm(z_mgn ~ x_mgn)	
summary(fit_mgn)


###### Q3 ######
oil_acf <- acf(oil)

T <- length(oil)
SE_acf <- 1/sqrt(T)
CI_lb <-0 - 1.96 *SE_acf
CI_ub <- 0 + 1.96 * SE_acf

## 3.a.	
# Any acf greater that 0.098 in absolute value is significant => only 3 (1st, 3rd & 4th)

## 3.b. 	
LB <- T*(T+2)*((-0.249)^2/(T-1) + -0.038^2/(T-2) + (-0.106)^2/(T-3)) 
LB	# very significant at 5%, chi-squared[df=3] = 7.31

## 3.c. - Compute first 3 ACF	
phi_1 <- 0.249  # |phi_1| < 1 => stationary AR(1) process

## 3.d. - Compute first 3 ACF	
IRF_3 <-   (phi_1 + phi_1^2 + phi_1^3)  # Interpretation, after 3 months the accumulated shock is IRF_3



###### Q4 - THEORY REVIEW ######
# 4.a. False. Under usual assumptions, if errors are heteroscedastic, OLS is unbiased, and consistent.
# 4.b. True. OLS and FGLS are both consistent. 
# 4.c. True. White SE, if there is heteroscedasticiy, or just OLS SE, if there is no heteroscedasticiy will be fine.
# 4.d. False. The variance is time dependent; it will be explosive as T grows
# 4.e. False. MA processes are always stationary.