####### Code for HW 2 - Version: 2024 #### Question 1 #### SH_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Shiller_data.csv", head=TRUE, sep=",") x_date <- SH_da$Date x_P <- SH_da$P x_D <- SH_da$D x_E <- SH_da$E T <- length(x_P) cpi_us <- SH_da$CPI i_us <- SH_da$Long_i T <- length(x_P) lr_p <- log(x_P[-1]/x_P[-T]) lr_d <- log(x_D[-1]/x_D[-T]) lr_e <- log(x_E[-1]/x_E[-T]) inf <- log(cpi_us[-1]/cpi_us[-T]) int <- (i_us[-1])/100 T <- length(lr_p) ## a. Report Regression fit_mod <- lm (lr_p ~ lr_e + inf + int) summary(fit_mod) ## b. Interpret R^2 (the model explains % of the variability of stock returns. Or more precisely, % of the variability of stock returns that is explained by the variability of earnings, inflation & interest rates) ## c. Interpret coefficient beta_1 (If earnings increase by 1%, stock returns increase by beta_1% ## d. Report F-stat in fit_mod output. Based on p-value reject reject H0. ## e. F test of Restrictions (beta_1 = beta_3 = 0) e_u <- fit_mod$residuals RSS_u <- sum(e_u^2) # Unrestricted RSS fit_r <- lm (lr_p ~ inf ) # Restricted model e_r <- fit_r$residuals RSS_r <- sum(e_r^2) # Restricted RSS F_test_1 <- ((RSS_r - RSS_u)/2)/(RSS_u/(T-4)) # F-test F_test_1 qf(.95, df1 = 2, df2= T-4) # F-value from table (if larger than F_test_1 cannot reject H0) p_val <- 1 - pf(F_test_1, df1 = 2, df2= T-4) # p-value of F-test p_val ## You could have also used package car. library(car) linearHypothesis(fit_mod, c("lr_e = 0","int = 0"), test="F") # F: exact test ## f. Testing J=2 Hypothesis with R package car library(car) linearHypothesis(fit_mod, c("inf = 0.5","int = -0.1"), test="F") # F€: exact test ## Structural Change with Chow Test at T_SB = Oct 1973 x_date[1234] # Check Tequila effect date x_break <- 1233 # We lost one observation when taking log changes y <- lr_p x <- cbind(lr_e, inf, int) T <- nrow(x) k <- ncol(x) + 1 T0 <- 1 # Initial observation of Regime 1 T1 <- x_break # Structural break time (end of Regime 1) T2 <- T1 + 1 # Initial observatin of Regime 2 # Restricted RSS (From Whole sample) x_ct <- x[T0:T,] y_ct <- y[T0:T] e_ct <- fit_mod$residuals RSS_ct <- sum(e_ct^2) # RSS_R # Unrestricted Estimation (Two periods) x_ct_1 <- x[T0:T1,] y_ct_1 <- y[T0:T1] x_ct_2 <- x[T2:T,] y_ct_2 <- y[T2:T] k1 <- ncol(x_ct_1) + 1 k2 <- ncol(x_ct_2) + 1 fit_mod_1 <- lm(y_ct_1 ~ x_ct_1) #OLS regression from 1 to T1 (Regime 1) e_ct_1 <- fit_mod_1$residuals RSS_ct_1 <- sum(e_ct_1^2) fit_mod_2 <- lm(y_ct_2 ~ x_ct_2) #OLS regression from T1+1 to T (Regime 2) e_ct_2 <- fit_mod_2$residuals RSS_ct_2 <- sum(e_ct_2^2) RSS_ct_tot <- RSS_ct_1 + RSS_ct_2 # RSS_U T_ct <- length(y_ct) J <- k2 + k1 - k F_test <- ((RSS_ct - RSS_ct_tot)/J)/(RSS_ct_tot/(T_ct - k1 - k2)) # F test F_test qf(.95, df1=J, df2=(T - k1 - k2)) # F_value from Table at (1-alpha)=0.95 p_val <- 1 - pf(F_test, df1=J, df2=(T_ct - k1 - k2)) p_val #### Question 2 #### fit_mod <- lm (lr_p ~ lr_e + inf + int) sh_ols <- summary(fit_mod)$coef[,1] #extracting OLS estimated parameters from lm (1st row in table) dat_Sh <- data.frame(lr_p, lr_e, inf, int) # create R dataframe to use in lmboot sim_size <- 1000 # R will mention that it's recommended to habe B=sim_size > T. You can set B = 2000 library(lmboot) sh_b <- paired.boot(lr_p ~ lr_e + inf + int, data=dat_Sh, B=sim_size) #paired bootstrap in lm ## a. Mean values for b (bootstrap estimates of b) mean(sh_b$bootEstParam[,1]) # print mean of bootstrap samples for constant mean(sh_b$bootEstParam[,2]) # print mean of bootstrap samples for lr_e mean(sh_b$bootEstParam[,3]) # print mean of bootstrap samples for inf mean(sh_b$bootEstParam[,4]) # print mean of bootstrap samples for int # Note: You can get from lmboot the OLS estimated parameters sh_b$origEstParam # OLS ("Original") estimated parameters ## a. bootstrap bias (OLS estimated parameter - Bootstrap estimated parameter) sh_b$origEstParam[1] - mean(sh_b$bootEstParam[,1]) sh_b$origEstParam[2] - mean(sh_b$bootEstParam[,2]) sh_b$origEstParam[3] - mean(sh_b$bootEstParam[,3]) sh_b$origEstParam[4] - mean(sh_b$bootEstParam[,4]) ## b. Use quantile function to get 95% CI for beta_2 (inf) parameter (percentile method) quantile(sh_b$bootEstParam[,3], probs=c(.025, .975)) #### Question 3 #### # Define standardized residuals x_resid <- residuals(fit_mod) x_stand_resid <- x_resid/sd(x_resid) # standardized residuals # Plot standardized residuals plot(x_stand_resid, typ ="l", col="blue", main ="IBM Standardized Residuals from 3 FF Factor Model", xlab="Date", ylab="IBM residuals") cooksd <- cooks.distance(fit_mod) # plot Cook's distance plot(cooksd, pch="*", cex=2, main="Influential Obs by Cooks distance") # add cutoff line abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line # add labels text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T), names(cooksd),""), col="red") # add labels library(olsrr) # need to install package olsrr sum(x_stand_resid > 2) # Rule of thumb count (5% count is OK) x_lev <- ols_leverage(fit_mod) # leverage residuals sum(x_lev > (2*k+2)/T) # Rule of thumb count (5% count is OK) sum(cooksd > 4/T) # Rule of thumb count (5% count is OK) sum(x_stand_resid > 2)/T # proportion of "outliers" sum(x_lev > (2*k+2)/T)/T # proportion of "outliers" sum(cooksd > 4/T)/T # proportion of "outliers" #### Question 5 #### Check Notes