########### PRACTICE FINAL EXAM ############# Fin_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Fin.csv", head=TRUE, sep=",") names(Fin_da) x_date <- Fin_da$Date x_cat <- Fin_da$CAT x_Mkt_RF<- Fin_da$Mkt_RF x_SMB <- Fin_da$SMB x_HML <- Fin_da$HML x_CMA <- Fin_da$CMA x_RMW <- Fin_da$RMW x_RF <- Fin_da$RF x_Div <- Fin_da$Dividends x_Earn <- Fin_da$Earnings T <- length(x_cat) lr_cat <- log(x_cat[-1]/x_cat[-T]) lr_earn <- log(x_Earn[-1]/x_Earn[-T]) Mkt_RF <- x_Mkt_RF[-1]/100 SMB <- x_SMB[-1]/100 HML <- x_HML[-1]/100 CMA <- x_CMA[-1]/100 RMW <- x_RMW[-1]/100 RF <- x_RF[-1]/100 cat_x <- lr_cat - RF # cat excess returns T_new <- length(cat_x) ###### QUESTION 1 ###### ## 1.a - Report Regression fit_cat_ff4 <- lm (cat_x ~ Mkt_RF + SMB + HML + RMW) summary(fit_cat_ff4) ## 1.b - Interpret Beta_1 and Beta_2 Coefficients ## 1.c - Drivers of cat # MKt, HML very significant (very very los p-values). SMB and RMW, significant at 10% ## 1.d - Interpret R^2 and F-goodness of Fit test # The variability of the FF factors explain 40.24% of the variability of cat excess returns # The F-statistic showa a low p-values. It rejects the H0 of no joint significant for the FF factors ## 1.e - Test if Beta_1 = 1 out_ff4 <- summary(fit_cat_ff4) b_ff4 <- fit_cat_ff4$coefficients b_ff4_1 <-out_ff4$coefficients[,1] b_ff4_sd1 <-out_ff4$coefficients[,2] t_beta_1 <- (b_ff4[2]-1)/b_ff4_sd1[2] t_beta_1 # |t_beta_1| < 1.96 => At the 5% level, we cannot reject H0: Beta_1 = 1. ## 2.f Test if beta_SMB = beta_HML = 0.5 library(car) linearHypothesis(fit_cat_ff4, c("SMB = 0.5","HML = 0.5"), test="F") # "F": exact test # p-value of F-stat is close to 5%. Marginal decision. Strictly speaking, we cannot reject H0. ## 2.g QLRt: Structural Break - Estimating breaking point date (with library desk) library(desk) e_cat <- fit_cat_ff4$residuals T<- length(e_cat) pie <- .15 T0 <- round(T * pie) T1 <- round(T *(1-pie)) my.qlr <- qlr.test(fit_cat_ff4, from = T0, to = T1, sig.level = 0.05, details = TRUE) plot(my.qlr, col = "red", main = "QLR Test: 4-FF Factor Model") # Plot test results max(my.qlr$f.stats) my.qlr$breakpoint # Extract breakpoint observation Fin_da$Date[my.qlr$breakpoint] # Print date qlr.cv(T, L = 2, sig.level = 0.05) # QLR F-value is 5.08668. It is low compared to the critical value => We cannot reject H0. No evidence of Structural Break ## 2.a - LM Test for Mkt_RF^2 and SMB^2 as drivers of heteroscedasticity Mkt_RF2 <- Mkt_RF^2 SMB2 <- SMB^2 resid_r <- fit_cat_ff4$residuals # extract residuals from FF3 model fit_lm <- lm (resid_r ~ Mkt_RF + SMB + HML + RMW + Mkt_RF2 + SMB2) # auxiliary regression summary(fit_lm ) R2_r <- summary(fit_lm)$r.squared # extract R2 from fit_lm R2_r LM_test <- R2_r * T_new LM_test qchisq(.95, df = 2) # chi-squared (df=2) value at 5% level p_val <- 1 - pchisq(LM_test, df = 2) # p-value of LM_test p_val # p-value of LM-test is 0.20 => We cannot reject H0. That is, no evidence that SMB^2 & HML^2 arae drivers of heteroscedasticity. ## 2.b - GQ and BP Test for heteroscedasticity. library(lmtest) gqtest(fit_cat_ff4) bptest(fit_cat_ff4) # Big p-values for GQ test and low for BP test => Evidence of heteroscedasticity. ## 2.c - DW Test for first-order autocorrelation dwtest(fit_cat_ff4) # p-value of DW test is 0.1038 => Cannot reject H0. That is, no evidence for first order autocorrelation in model errors. ## 2.d - BG Test for autocorrelation bgtest(fit_cat_ff4, order=4) bgtest(fit_cat_ff4, order=12) # Big p-values for both tests. No evidence of 4th and 12th order autocorrelation. ## 2.e #### Q & LB Autocorrelation Tests & Q* Automatic Lag Selection Test y <- e_cat Box.test(y, lag = 4, type="Box-Pierce") Box.test(y, lag = 12, type="Box-Pierce") Box.test(y, lag = 4, type="Ljung-Box") Box.test(y, lag = 12, type="Ljung-Box") library(vrtest) Auto.Q(y, 12) # Q* test automatic selection of p # No change for Mkt and HML; but, once, we take into account autocorrelation and heteroscedasticity SMB and RMW drop their significance. RMW not longer signficant at 10% level. ## 2.f NW SE # At 5% level, we have evidence of autocorrelation. Then, use NW SE. library(sandwich) Var_NW <- NeweyWest(fit_cat_ff4, lag = 12) SE_NW <- sqrt(diag(Var_NW)) t_b_NW <- b_ff4/SE_NW t_b_NW # Once, we take into account autocorrelation and heteroscedasticity SMB is no longer significant at the 5% level (but, significant at 10% level). ###### QUESTION 3 ###### x_date[433] ## 3.a - Estimation Period T0 <- 1 T1 <- 432 # End of Estimation Period (2016.12) T2 <- T1+1 # Start of Validation Period (2017.1) y <- cat_x xx_ff4 <- cbind(Mkt_RF, SMB, HML, RMW) y1 <- y[T0:T1] T2 <- T1+1 # Start of Validation Period (2017.1) x1 <- xx_ff4[T0:T1,] fit_cat_est <- lm(y1 ~ x1) # Estimation Period Regression (1971.2 - 2017.4) b_est <- fit_cat_est$coefficients # Extract OLS coefficients from regression summary(fit_cat_est) ## 3.b - Forecasting and MSE Under RW Assumption For Independent Variables xx_cons <- rep(1,T_new-T2+1) k_for <- length(xx_cons) # Create a constant for Validation forecasts y_mod_f0 <- cbind(xx_cons,xx_ff4[T1:(T_new-1),]) %*% b_est # Validation period data e_mod_f0 <- y[T2:T_new] - y_mod_f0 # Forecasat error mse_e_f0 <- sum(e_mod_f0^2)/k_for # Model's MSE mse_e_f0 ## 3.c - Forecasting and MSE Under RW Model e_rw_f0 <- y[T2:T_new] - y[T1:(T_new - 1)] # RW Forecasat error mse_rw_f0 <- sum(e_rw_f0^2)/k_for # RW's MSE mse_rw_f0 ## 3.d - Testing Equality of MSE: Mod vs RW z_mgn <- e_rw_f0 + e_mod_f0 # RW has higher MSE => becomes e(1) x_mgn <- e_rw_f0 - e_mod_f0 fit_mgn <- lm(z_mgn ~ x_mgn) summary(fit_mgn) # Check t-stat # Significant slope coefficient in MGN regression => Model's MSE is significantly different (& lower) than the RW. ###### QUESTION 4 ###### ## ARMA(1,3) y_t = 0.56 + .46 y_(t-1) + 0.5 ε_(t-1) - 0.4 ε_(t-2)- 0.2 ε_(t-3) + ε_t # Data y_t_1 <- 1.34 e_t_1 <- 0.17 e_t_2 <- -0.95 e_t_3 <- 0.77 ## 4.a - Obtain forecasts for the series y_t for times t, t+1, and t+2 using the estimated ARMA model. y_t <- 0.56 + 0.46 * y_t_1 + 0.5 * e_t_1 - 0.4 * e_t_2 - 0.2 * e_t_3 y_t_p_1 <- 0.56 + 0.46 * y_t - 0.4 * e_t_1 - 0.2 * e_t_2 y_t_p_2 <- 0.56 + 0.46 * y_t_p_1 - 0.2 * e_t_1 ## 4.b - MSE for ARMA(1,3) #Observed Data y_actual_t <- -0.21 y_actual_t_p_1 <- -0.42 y_actual_t_p_2 <- 3.01 MSE_arma <- 1/3 *((y_actual_t- y_t)^2 + (y_actual_t_p_1 - y_t_p_1)^2 + (y_actual_t_p_2 - y_t_p_2)^2) MSE_arma ## 4.c. Use a SES model to forecast the values for the series y_t for times t, t+1, and t+2 The estimated value of the smoothing constant, α, is 0.26, with the most recently available smoothed value, St-1 = 0.307. Obtain forecasts for the series yt for times t, t+1, and t+2 using this model. Compute the (out-of-sample) MSE. alpha <- 0.26 S_t_1 <- 0.607 S_t <- S_t_1 + alpha *(y_t_1 - S_t_1) S_t_p_1 <- S_t S_t_p_2 <- S_t MSE_ses <- 1/3 *((y_actual_t- S_t_p_2)^2 + (y_actual_t_p_1 - S_t_p_2)^2 + (y_actual_t_p_2 - S_t_p_2)^2) MSE_ses ## 4.d - Select the better forecasting model. # SES is selected. It has a lower MSE. ###### QUESTION 5 ###### ## 5.a - Report ACF & PACF plot(lr_earn) acf(lr_earn) pacf(lr_earn) ## 5.b - Suggest a model # AR(4), AR(8)?. Some seasonality at lag 13, which is really strange!... The financial crisis (with outliers) is likely affecting the series. Maybe better to use an ARMA(,)? Try auto.arima library(forecast) auto.arima(lr_earn) fit_arma43 <- arima(lr_earn, order=c(4,0,3)) fit_arma43 ## 5.c - Stationarity? autoplot(fit_arma43) # Inverse AR roots inside the unit circle => stationary... # Inverse MA roots at the unit circle => close to non-invertible ## 5.d - Autocorrelated Residuals? checkresiduals(fit_arma43) # Yes, evidence of autocorrelated residuals... Residuals are not White Noise. ## 5.e - LM Test for seasonality in residuals. zz <- fit_arma43$residuals Feb1 <- rep(c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create January dummy Mar1 <- rep(c(0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create March dummy Apr1 <- rep(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create April dummy May1 <- rep(c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create May dummy Jun1 <- rep(c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create June dummy Jul1 <- rep(c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create Jul dummy Aug1 <- rep(c(0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), (length(zz)/12+1)) # Create Aug dummy Sep1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0), (length(zz)/12+1)) # Create Sep dummy Oct1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0), (length(zz)/12+1)) # Create Oct dummy Nov1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (length(zz)/12+1)) # Create Oct dummy Dec1 <- rep(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (length(zz)/12+1)) # Create Oct dummy Feb <- Feb1[1:T_new] Mar <- Mar1[1:T_new] Apr <- Apr1[1:T_new] May <- May1[1:T_new] Jun <- Jun1[1:T_new] Jul <- Jul1[1:T_new] Aug <- Aug1[1:T_new] Sep <- Sep1[1:T_new] Oct <- Oct1[1:T_new] Nov <- Nov1[1:T_new] Dec <- Dec1[1:T_new] e_arma43 <- fit_arma43$residuals lm_seas <- lm(e_arma43 ~ Feb + Mar + Apr + May + Jun + Jul + Aug + Sep + Oct + Nov + Dec) R2_r <- summary(lm_seas)$r.squared # extracting R^2 from fit_lm R2_r LM_test <- R2_r * length(e_arma43) LM_test qchisq(.95, df = 11) # chi-squared (df=2) value at 5% level p_val <- 1 - pchisq(LM_test, df = 11) # p-value of LM_test p_val # High p-value => No evidence of seasonality in residuals. No need to reformulate model for seasonality, but very likely needs a dummy for the 2008 Financial Crisis. ###### QUESTION 6 - THEORY REVIEW ###### ## 6.a - Yes. See above example of Log Changes for Earnings ## 6.b - Yes. White SE are used when only heteroscedasticy is present. For autocorrelated errors, we use NW. ## 6.c - False. Not the same, but, in practice, both estimates should be "similar." ## 6.d - True. It relies on the Empirical Distribution, not normality assumption. ## 6.e - True. Second order stationarity only looks for constancy of the first two moments and the covariance function, which should depend on k, not t. ## 6.f - False. Stationarity depends on the AR(1) coefficient. The AR(1) coefficient, Phi_1 has to be smaller than 1 in absolute value. ## MA processes are always stationary. ## 6.g - True. The variance depends on time and as time grows it become undefined.