###############   Practice Exam # 2 - Code (Fall 2025)    ###############
RE_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Cars_Mid2.csv", head=TRUE, sep=",")
summary(RE_da)


x_date <- RE_da$DATE
x_cars <- RE_da$Cars
x_oil_p <- RE_da$US_Oil_prod 
x_i <- RE_da$i_10y
x_ip <- RE_da$US_IP
us_hp <- RE_da$US_HP 
x_oil <- RE_da$Oil_WTI 
x_cons <- RE_da$Cons_sent
x_tech <- RE_da$Tech
x_pop <- RE_da$US_pop
u_us <- RE_da$US_u
x_Mkt <- RE_da$Mkt_RF
x_SMB <- RE_da$SMB
x_HML <- RE_da$HML
x_RMW <- RE_da$RMW
x_CMA <- RE_da$CMA
x_RF <- RE_da$RF
Mkt_RF <- x_Mkt/100
SMB <- x_SMB/100
HML <- x_HML/100
RMW <- x_RMW/100
CMA <- x_CMA/100
RF <- x_RF/100

zz <- x_cars
T <- length(x_cars)
length(x_tech)
T_sb <- 248
T_f <- T - T_sb
Fin_c0 <- rep(0,T_sb)
Fin_c1 <- rep(1,T_f)
Fin_c <- c(Fin_c0,Fin_c1)						# Create 2008 Financial crisis dummy
Jan1 <- rep(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create January dummy
Feb1 <- rep(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create March dummy
Mar1 <- rep(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create April dummy
Apr1 <- rep(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create May dummy
May1 <- rep(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create June dummy
Jun1 <- rep(c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create Jul dummy
Jul1 <- rep(c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), (length(zz)/12+1))	# Create Aug dummy
Aug1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), (length(zz)/12+1))	# Create Sep dummy
Sep1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), (length(zz)/12+1))	# Create Oct dummy
Oct1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (length(zz)/12+1))	# Create Oct dummy
Nov1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (length(zz)/12+1))	# Create Oct dummy
Jan <- Jan1[1:T]
Feb <- Feb1[1:T]
Mar <- Mar1[1:T]
Apr <- Apr1[1:T]
May <- May1[1:T]
Jun <- Jun1[1:T]
Jul <- Jul1[1:T]
Aug <- Aug1[1:T]
Sep <- Sep1[1:T]
Oct <- Oct1[1:T]
Nov <- Nov1[1:T]
Dec <- Dec1[1:T]
Spring <- Mar + Apr + May
Summ <- Jun +Jul + Aug
Fall <- Sep + Oct + Nov
us_hp <- us_hp^2
u_us2 <- u_us^2
tech2 <- x_tech^2
x_ip2 <- x_ip^2
x_oil_p2 <- x_oil^2
x_ppi_h2 <- x_ppi_h^2
us_hp_oil <- us_hp * x_oil
us_hp_oil_p <- us_hp * x_oil_p
us_hp_tech <- us_hp * x_tech
us_hp_Spring <- us_hp * Spring
us_hp_Summ <- us_hp * Summ
us_hp_Fall <- us_hp * Fall
us_hp_Finc <- us_hp * Fin_c
x_ip_oil <- x_ip * x_oil
x_ip_oil_p <- x_ip * x_oil_p
x_ip_Spring <- x_ip * Spring
x_ip_Summ <- x_ip * Summ
x_ip_Fall <- x_ip * Fall
u_us_oil <- u_us * x_oil
u_us_x_ip <- u_us * x_ip
u_us_tech <- u_us * x_tech
u_us_Spring <- u_us * Spring
u_us_Summ <- u_us * Summ
u_us_Fall <- u_us * Fall
tech_Spring <- x_tech*Spring
tech_Summ <- x_tech * Summ
tech_Fall <- x_tech * Fall
u_cars_Finc <- u_cars * Fin_c
x_ip_Finc <- x_ip * Fin_c
tech_Finc <- x_tech * Fin_c
Finc_Spring <- Fin_c*Spring
Finc_Summ <- Fin_c*Summ
Finc_Fall <- Fin_c*Fall



####  1.a - Regression and Testing
fit_cars <- lm(x_cars ~ us_hp + x_oil + x_i + u_us + x_ip + Fin_c + Summ ) 
summary(fit_cars)

## 1.b - Report & interpret R^2 & Beta_1
# R2 = 0.1177 (12% of the variability of mexican interest rates is explained by the variables in the model
# Beta_2 = 0.1627 (a 1% change in US interest rate increases Mex interest rates by 0.16%)

## 1.c - Drivers of regression 
# oil prices & US Industrial production

## 1.d - Heteroscedasticity Tests: GQ
gqtest(fit_cars)
# No evidence against H0 (no heteroscedasticity)		

## 1.e - Heteroscedasticity Tests: LM-BP
e_fit <- fit_cars$residuals
e_fit2 <- e_fit^2		# Potential driver of variance
x_ip2 <- x_ip^2		# Potential driver of variance
x_oil2 <- x_oil^2		# Potential driver of variance

bptest(formula = fit_cars, varformula = ~ x_i + x_ip2 + x_oil2)
# Strong evidence against H0 => reject no heteroscedasticity	

## 1.1.f - LM Test for AR(p)
bgtest(fit_cars, order=4)
dwtest(fit_cars)		# Extra test for AR(1) autocorrelation
# Strong evidence against H0 => reject no autocorrelation

## 1.g -  NW SE 
Var_NW <- NeweyWest(fit_cars, lag = 12)
coeftest(fit_i, vcov = Var_NW)
# No change in significant variables.

## 1.h -  Structural Change SE 
library(strucchange)
# Specific break = 08/2008
t_s <- 247			# 2008:August
sctest(fit_cars, type = "Chow", point = t_s)

# Anytime - Andrews (1993)
library(desk)
pie <- .15
T0 <- round(T * pie)
T1 <- round(T *(1-pie))
my.qlr <- qlr.test(fit_cars_red, from = T0, to = T1, sig.level = 0.05, details = TRUE)
my.qlr # Print test results

plot(my.qlr, col = "red", main = "QLR Test: GDP growth rate AR(1) Model - 1947-2024") # Plot test results
max(my.qlr$f.stats)
which.max(my.qlr$f.stats) + T0

my.qlr$breakpoint		# Extract breakpoint observation
x_date[my.qlr$breakpoint]	# Print date

qlr.cv(T, L = 2, sig.level = 0.05)
# No evidence of structural break.

########### QUESTION 2 ############
## 2.a - GUM and Reduced (Specific) Models

## 2.a.1
fit_cars_gum <- lm(formula = x_cars ~ x_oil_p + x_i + x_oil + x_pop + Tind + Mkt_RF + SMB + HML + RMW + CMA +  
                     us_hp + x_cons + x_tech + u_us2 +  x_ip2 + tech2 + x_oil_p2 +  Spring + Summ + Fall + Fin_c +  us_hp_oil + us_hp_oil_p + us_hp_tech + us_hp_Spring + us_hp_Summ + us_hp_Fall + 
                     us_hp_Finc +  x_ip_oil + x_ip_oil_p + x_ip_Spring + x_ip_Summ + x_ip_Fall + 
                     u_us_oil + u_us_Finc +  u_us_Summ + tech_Spring + tech_Summ + tech_Fall + Tind_Finc + 
                     tech_Finc + Finc_Spring + Finc_Summ + Finc_Fall)

summary(fit_cars_gum)

## 2.a.2.
fit_cars_red <- lm(x_cars ~ SMB  + x_ip2 + x_ip_Spring + u_us_oil)
summary(fit_cars_red)

# Drivers: SMB, ip^2, Oil production interacting with unemployment & Spring interacting with US IP

## 2.b -  Seasonality?
# No direct evidence, though Spring interacting term with IP is significant at the 5% level. Overall, we reject seasonality

## 2.c - Reduced Model: Heteroscedasticity?
e_cars <- fit_cars_red$residuals
e_cars2 <- e_cars^2
bptest(fit_cars, order=4)
box.text(e_cars)
# Strong evidence against H0 => reject no heteroscedasticity	

## 2d - Reduced Model: Autocorrelation?
e_cars <- fit_cars_red$residuals
bgtest(fit_cars, order=4)
# Strong evidence against H0 => reject no autocorrelation	

## 2.e - White and NW SE
## Implication: At 5% level, we have evidence of both heteroscedasticiy & autocorrelation. Then, use NW SE.
Var_NW <- NeweyWest(fit_cars_red, lag = 4)
coeftest(fit_cars_red, Var_NW)




########### QUESTION 3 ############
## 3.a -  Estimation Period
y <- x_cars
xx_i <- cbind(x_i, x_oil, x_ip)		# X matrix

T0 <- 1
T1 <- 432							# End of Estimation Period (2021.4)
T2 <- T1+1							# Start of Validation Period (2022.1)
y1 <- y[T0:T1]
x1 <- xx_i[T0:T1,]
fit_cars_est <- lm(y1 ~  x1)					# Estimation Period Regression (1971.2 - 2020.4)
b_est <- fit_cars_est$coefficients					# Extract OLS coefficients from regression
b_est								# OLS coefficients
summary(fit_cars_est)

## 3.b -  Forecating Validation Period with Model
# RW Assumption For Independent Variables
T_new <- T - T1
xx_cons <- rep(1,T_new)
k_for <- length(xx_cons)					# Create a constant for Validation forecasts
y_mod_f0 <- cbind(xx_cons,xx_i[T1:(T-1),]) %*% b_est		# Validation period data
e_mod_f0 <- y[T2:T] - y_mod_f0					# Forecasat error
mse_e_f0 <- sum(e_mod_f0^2)/k_for				# MSE
mse_e_f0


## 3.c -  RW assumption for cars
y_rw_f <- y[T1:(T-1)]
e_rw_f0 <- y[T2:T] - y_rw_f 
mse_rw_f0 <- sum(e_rw_f0 ^2)/k_for				# MSE
mse_rw_f0

## 3.d -   Testing Equality of MSE: Mod vs RW
z_mgn <-  e_mod_f0 + e_rw_f0
x_mgn <- e_mod_f0 - e_rw_f0
fit_mgn <- lm(z_mgn ~ x_mgn)	
summary(fit_mgn)						# Check t-stat

# RW's MSE is statistically different (better) than Model's MSE


########### QUESTION 4 ############
y <- x_cars

## 4.a - ACF/PACF
acf(y)	# MA(2)?
pacf(y)	# AR(2)?

## 4.b - Seasonality
T_y <- length(y)

Feb <-rep(c(1,0,0,0,0,0,0,0,0,0,0,0),T_y/12+1) # Create January dummy
Mar <-rep(c(0,1,0,0,0,0,0,0,0,0,0,0),T_y/12+1) # Create February dummy
Apr <-rep(c(0,0,1,0,0,0,0,0,0,0,0,0),T_y/12+1) 
May <-rep(c(0,0,0,1,0,0,0,0,0,0,0,0),T_y/12+1) 
Jun <-rep(c(0,0,0,0,1,0,0,0,0,0,0,0),T_y/12+1) 
Jul <-rep(c(0,0,0,0,0,1,0,0,0,0,0,0),T_y/12+1) 
Aug <-rep(c(0,0,0,0,0,0,1,0,0,0,0,0),T_y/12+1) 
Sep <-rep(c(0,0,0,0,0,0,0,1,0,0,0,0),T_y/12+1) 
Oct <-rep(c(0,0,0,0,0,0,0,0,1,0,0,0),T_y/12+1) 
Nov <-rep(c(0,0,0,0,0,0,0,0,0,1,0,0),T_y/12+1) 
Dec <-rep(c(0,0,0,0,0,0,0,0,0,0,1,0),T_y/12+1) 
seas1 <-cbind(Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec)

seas <- seas1[1:T_y,]

fit_y_seas <- lm(y ~ seas)
summary(fit_y_seas)
# No evidence of seasonality

## 4.c - Automatic ARMA selection
library(forecast)
auto.arima(y)

library(weakARMA)
ar_y <- ARMA.selec(y,P=3,Q=3)
ar_y

library(timsac) 			# uses only AIC
autoarmafit(y, max.order = NULL)

fit_02 <- arima(y, order=c(0,0,2))
fit_02 

##  4.d. - Stationarity?	MA is always stationary
library(forecast)
plot(fit_02)

##  4.e - Residuals WN?
checkresiduals(fit_02)
# Not big departures from 95% C.I. Ljung-Box test cannot reject no autocorrelation



###### QUESTION 5 ######
Review Notes.