####### Code for HW 2 - Version: 2023

#### Question 1 ####

SH_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Shiller_data.csv", head=TRUE, sep=",")
x_date <- SH_da$Date
x_P <- SH_da$P 
x_D <- SH_da$D
x_E <- SH_da$E
T <- length(x_S)
cpi_us <- SH_da$CPI
i_us <- SH_da$Long_i

T <- length(x_P)
lr_p <- log(x_P[-1]/x_P[-T])
lr_d <- log(x_D[-1]/x_D[-T])
lr_e <- log(x_E[-1]/x_E[-T])
inf <- log(cpi_us[-1]/cpi_us[-T])
int <- (i_us[-1])/100

T <- length(lr_p)


## a. Report Regression
fit_mod <- lm (lr_p ~ lr_e + inf + int)
summary(fit_mod)

## b. Interpret R^2 (the model explains % of the variability of stock returns. Or more precisely, % of the variability of stock returns that is explained by the variability of earnings, inflation & interest rates)

## c. Interpret coefficient beta_1 (If earnings increase by 1%, stock returns increase by beta_1%

## d. Report F-stat in fit_mod output. Based on p-value reject reject H0.

## e. F test of Restrictions (beta_1 = beta_3 = 0)
e_u <- fit_mod$residuals
RSS_u <- sum(e_u^2)				# Unrestricted RSS

fit_r	<- lm (lr_p ~ inf )			# Restricted model
e_r <- fit_r$residuals
RSS_r <- sum(e_r^2)				# Restricted RSS

F_test_1 <- ((RSS_r - RSS_u)/2)/(RSS_u/(T-4))	# F-test
F_test_1
qf(.95, df1 = 2, df2= T-4)			# F-value from table (if larger than F_test_1 cannot reject H0)
p_val <- 1 - pf(F_test_1, df1 = 2, df2= T-4) 	# p-value of F-test
p_val

## You could have also used package car. 
library(car)
linearHypothesis(fit_mod, c("lr_e = 0","int = 0"), test="F") 	# F: exact test 

## f. Testing J=2 Hypothesis with R package car
library(car)
linearHypothesis(fit_mod, c("inf = 0.5","int = -0.1"), test="F") 	# F: exact test 

## Structural Change with Chow Test at T_SB = Oct 1973
x_date[1234]					# Check Tequila effect date
x_break <- 1233					# We lost one observation when taking log changes
y <- lr_p
x <- cbind(lr_e, inf, int)
T <- nrow(x)
k <- ncol(x)
T0 <- 1						# Initial observation of Regime 1
T1 <- x_break					# Structural break time (end of Regime 1)
T2 <-  T1 + 1					# Initial observatin of Regime 2

# Restricted RSS (From Whole sample)
x_ct <- x[T0:T,]
y_ct <- y[T0:T]

e_ct <- fit_mod$residuals
RSS_ct <- sum(e_ct^2) 				# RSS_R

# Unrestricted Estimation (Two periods)
x_ct_1 <- x[T0:T1,]
y_ct_1 <- y[T0:T1]
x_ct_2 <- x[T2:T,]
y_ct_2 <- y[T2:T]

k1 <- ncol(x_ct_1)
k2 <- ncol(x_ct_2)
fit_mod_1 <- lm(y_ct_1 ~ x_ct_1) 		#OLS regression from 1 to T1 (Regime 1)
e_ct_1 <- fit_mod_1$residuals
RSS_ct_1 <-  sum(e_ct_1^2)

fit_mod_2 <- lm(y_ct_2 ~ x_ct_2)		#OLS regression from T1+1 to T (Regime 2)
e_ct_2 <- fit_mod_2$residuals
RSS_ct_2 <-  sum(e_ct_2^2)

RSS_ct_tot <- RSS_ct_1 + RSS_ct_2 		# RSS_U

T_ct <- length(y_ct)
J <- k2 + k1 - k
F_test <- ((RSS_ct - RSS_ct_tot)/J)/(RSS_ct_tot/(T_ct - k1 - k2))	# F test
F_test

qf(.95, df1=J, df2=(T - k1 - k2)) 		# F_value from Table at (1-alpha)=0.95
p_val <- 1 - pf(F_test, df1=J, df2=(T_ct - k1 - k2)) 
p_val


#### Question 2 ####

fit_mod <- lm (lr_p ~ lr_e + inf + int)
sh_ols <- summary(fit_mod)$coef[,1]		#extracting OLS estimated parameters from lm (1st row in table)

dat_Sh <- data.frame(lr_p, lr_e, inf, int)	# create R dataframe to use in lmboot	
sim_size <- 1000				# R will mention that it's recommended to habe B=sim_size > T. You can set B = 2000	

library(lmboot)
sh_b <- paired.boot(lr_p ~ lr_e + inf + int, data=dat_Sh, B=sim_size)	#paired bootstrap in lm

## a. Mean values for b (bootstrap estimates of b)
mean(sh_b$bootEstParam[,1])		# print mean of bootstrap samples for constant
mean(sh_b$bootEstParam[,2]) 		# print mean of bootstrap samples for lr_e
mean(sh_b$bootEstParam[,3])		# print mean of bootstrap samples for inf
mean(sh_b$bootEstParam[,4]) 		# print mean of bootstrap samples for int

# Note: You can get from lmboot the OLS estimated parameters
sh_b$origEstParam			# OLS ("Original") estimated parameters

## a. bootstrap bias (OLS estimated parameter - Bootstrap estimated parameter)
sh_b$origEstParam[1] - mean(sh_b$bootEstParam[,1])
sh_b$origEstParam[2] - mean(sh_b$bootEstParam[,2])
sh_b$origEstParam[3] - mean(sh_b$bootEstParam[,3])
sh_b$origEstParam[4] - mean(sh_b$bootEstParam[,4])

## b. Use quantile function to get 95% CI for beta_2 (inf) parameter (percentile method)
quantile(sh_b$bootEstParam[,3], probs=c(.025, .975))


#### Question 3 ####
SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep=",")
x_ge <- SFX_da$GE
x_xom <- SFX_da$XOM
x_Mkt_RF<- SFX_da$Mkt_RF
x_SMB <- SFX_da$SMB
x_HML <- SFX_da$HML
x_CMA <- SFX_da$CMA
x_RMW <- SFX_da$RMW
x_RF <- SFX_da$RF

T <- length(x_ibm)
lr_ge <- log(x_ge[-1]/x_ge[-T])
lr_xom <- log(x_xom[-1]/x_xom[-T])
x0 <- matrix(1,T-1,1)
Mkt_RF <- x_Mkt_RF[-1]/100
SMB <- x_SMB[-1]/100
HML <- x_HML[-1]/100
CMA <- x_CMA[-1]/100
RMW <- x_RMW[-1]/100
RF <- x_RF[-1]/100
ge_x <- lr_ge - RF						# IBM excess returns 

y <- ge_x
fit1 <- lm(y ~ Mkt_RF + SMB + HML)			# Model 1
summary(fit1)

fit2 <- lm(y ~ Mkt_RF + CMA + RMW) 			# Model 2
summary(fit2)

## J-test (using package lmtest)
library(lmtest)
jtest(fit1,fit2)

## Encompassing Model
fit_enc <- lm( y ~ Mkt_RF + SMB + HML + CMA + RMW)
summary(fit_enc)

# Encompassing test (using package lmtest)
encomptest(fit1,fit2)



#### Question 4 ####
RSS_R = 0.189
RSS1 = 0.079
RSS2 = 0.082
RSS_U <- RSS1+RSS2

T = 180
k_U = 4
k_R = 2
J <- k_U - k_R
F <- ((RSS_R - RSS_U)/J)/(RSS_U/(T-k_U))

qf(.95, df1=J, df2=(T - k_U)) 
p_val <- 1 - pf(F_test, df1=J, df2=(T - k_U)) 
p_val



#### Question 5 ####
Check Notes