############# Linear Model: Data Problemd, Bootstrap SE[b] and t[b] & Testing Joint Hypothesis (Code #6) ############# SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv",head=TRUE,sep=",") names(SFX_da) x_sp <- SFX_da$SP500 x_ibm <- SFX_da$IBM x_Mkt_RF<- SFX_da$Mkt_RF x_SMB <- SFX_da$SMB x_HML <- SFX_da$HML x_RF <- SFX_da$RF T <- length(x_ibm) lr_ibm <- log(x_ibm[-1]/x_ibm[-T]) lr_sp <- log(x_sp[-1]/x_sp[-T]) x0 <- matrix(1,T-1,1) Mkt_RF <- x_Mkt_RF[-1]/100 SMB <- x_SMB[-1]/100 HML <- x_HML[-1]/100 RF <- x_RF[-1]/100 ibm_x <- lr_ibm - RF # OLS results fit_ibm_ff3 <- lm(ibm_x ~ Mkt_RF + SMB + HML) summary(fit_ibm_ff3) y <- ibm_x x <- cbind(x0, Mkt_RF, SMB, HML) data_xy <- data.frame(y, x) # data frame to use with head function to locate first 10 influential obs k <- ncol(x) # Plot residuals x_resid <- residuals(fit_ibm_ff3) plot(x_resid, typ ="l", col="blue", main ="IBM Residuals from 3 FF Factor Model", xlab="Date", ylab="IBM residuals") # Plot standardized residuals x_stand_resid <- x_resid/sd(x_resid) # standardized residuals plot(x_stand_resid, typ ="l", col="blue", main ="IBM Standardized Residuals from 3 FF Factor Model", xlab="Date", ylab="IBM residuals") cooksd <- cooks.distance(fit_ibm_ff3) # plot Cook's distance plot(cooksd, pch="*", cex=2, main="Influential Obs by Cooks distance") # add cutoff line abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line # add labels text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T), names(cooksd),""), col="red") # add labels # influential row numbers influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))]) # print first 10 influential observations. head(data_xy[influential, ], n=10L) install.packages("olsrr") library(olsrr) # need to install package olsrr sum(x_stand_resid > 2) # Rule of thumb count (5% count is OK) x_lev <- ols_leverage(fit_ibm_ff3) # leverage residuals sum(x_lev > (2*k+2)/T) # Rule of thumb count (5% count is OK) sum(cooksd > 4/T) # Rule of thumb count (5% count is OK) sum(x_stand_resid > 2)/T # proportion of "outliers" sum(x_lev > (2*k+2)/T)/T # proportion of "outliers" sum(cooksd > 4/T)/T # proportion of "outliers" ols_plot_resid_stand(fit_ibm_ff3) # Plot Standardize residuals ols_plot_cooksd_bar(fit_ibm_ff3) # Plot Cook’s D measure ols_plot_dfbetas(fit_ibm_ff3) # Plot Difference in betas ols_vif_tol(fit_ibm_ff3) ols_eigen_cindex(fit_ibm_ff3) #### Bootstrap library(boot) ## Bootstrap Correlation coefficients B = 1000 sim_size = 1000 dat_spibm <- data.frame(lr_sp, lr_ibm) # function to obtain the correlation coefficient from the data cor_xy <- function(data, i) { d <-data[i,] return(cor(d$lr_sp,d$lr_ibm)) } # bootstrapping with sim_size replications boot.samps <- boot(data=dat_spibm, statistic=cor_xy, R=sim_size) # view stored bootstrap samples and compute mean boot.samps # Print original ρ, bias and SE of bootstraps mean(boot.samps$t) # our estimate of the correlation sd(boot.samps$t) # SD of the correlation estimate # Elegant histogram hist(boot.samps$t,main="Histogram for Bootstrapped Correlations", xlab="Correlations", breaks=20) boot.ci(boot.samps, type="basic") ## Bootstrap Correlation coefficients B = 100 sim_size = 100 boot.samps <- boot(data=dat_spibm, statistic=cor_xy, R=sim_size) # view stored bootstrap samples and compute mean boot.samps # Print original ρ, bias and SE of bootstraps mean(boot.samps$t) # our estimate of the correlation sd(boot.samps$t) # SD of the correlation estimate boot.ci(boot.samps, type="basic") ## Bootstrap Correlation coefficients B = 25 sim_size = 25 boot.samps <- boot(data=dat_spibm, statistic=cor_xy, R=sim_size) # view stored bootstrap samples and compute mean boot.samps # Print original ρ, bias and SE of bootstraps mean(boot.samps$t) # our estimate of the correlation sd(boot.samps$t) # SD of the correlation estimate boot.ci(boot.samps, type="basic") ## Bootstrap SE for b in a Linear Model install.packages("lmboot") library(lmboot) # need to run before install.packages(“lmboot”) y <- ibm_x x <- cbind(x0, Mkt_RF, SMB, HML) dat_yx <- data.frame(y, x) # lmboot needs an R data frame. We make one. t_stat <- summary(fit_ibm_ff3)$coef[,3] #extracting t-values from lm (3rd row in table) sim_size = 1000 ff3_b <- paired.boot(y ~ x-1, data=dat_yx, B=sim_size) ff3_b$origEstParam # print OLS results (“original estimates”) ff3_b$bootEstParam[1:10,] # print the first 10 of B=1,000 bootstrap samples ## Mean values for b mean(ff3_b$bootEstParam[,1]) # print mean of bootstrap samples for constant mean(ff3_b$bootEstParam[,2]) # print mean of bootstrap samples for Mkt_RF mean(ff3_b$bootEstParam[,3]) mean(ff3_b$bootEstParam[,4]) # Statistics for sampling distribution of b summary(ff3_b$bootEstParam) # Summary of bootstraps (mean, median, max, min, etc) hist(ff3_b$bootEstParam[,2],main="Histogram for Bootstrapped Market Beta", xlab="Correlations", breaks=20) ## bootstrap bias ff3_b$origEstParam[1] - mean(ff3_b$bootEstParam[,1]) ff3_b$origEstParam[2] - mean(ff3_b$bootEstParam[,2]) ff3_b$origEstParam[3] - mean(ff3_b$bootEstParam[,3]) ff3_b$origEstParam[4] - mean(ff3_b$bootEstParam[,4]) ## SD for each parameter sd1 <- sd(ff3_b$bootEstParam[,1]) sd2 <- sd(ff3_b$bootEstParam[,2]) sd3 <- sd(ff3_b$bootEstParam[,3]) sd4 <- sd(ff3_b$bootEstParam[,4]) ## t-values for each parameter t1 <- ff3_b$bootEstParam[,1]/sd1 t2 <- ff3_b$bootEstParam[,2]/sd2 t3 <- ff3_b$bootEstParam[,3]/sd3 t4 <- ff3_b$bootEstParam[,4]/sd4 ## mean t-values mean(t1) mean(t2) mean(t3) mean(t4) ## bias in t-values for each parameter mean(t1) - t_stat[1] mean(t2) - t_stat[2] mean(t3) - t_stat[3] mean(t4) - t_stat[4] ##### Testing Linear Hypothesis in the Linear Model ### Testing the Expectations Hypothesis (EH) SF_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/SpFor_prices.csv", head=TRUE, sep=",") summary(SF_da) x_date <- SF_da$Date x_S <- SF_da$GBPSP x_F3m <- SF_da$GBP3M i_us3 <- SF_da$Dep_USD3M i_uk3 <- SF_da$Dep_UKP3M T <- length(x_S) prem <- (x_S[-1] - x_F3m[-T])/x_S[-1] # forward premium int_dif <- (i_us3 - i_uk3)/100 # interest rate differential y <- prem x <- int_dif[-T] fit_eh <- lm( y ~ x) # Regression to test EH summary(fit_eh) # Check t-values ### Testing Linear Hypothesis in the Fama-French 3 factor Linear Model x <- cbind(x0, Mkt_RF, SMB, HML) k <- ncol(x) # OLS results summary(fit_ibm_ff3) J <- 2 # number of restriction R <- matrix(c(0,0,0,0,1,0,0,1), nrow=2) # matrix of restrictions q <- c(.2, .6) # hypothesized values b <- fit_ibm_ff3$coefficients # Extract OLS coefficients Var_b <- vcov(fit_ibm_ff3) # Extract Var[b] m <- R%*%b - q # m = Estimated R*Beta - q Var_m <- R %*% Var_b %*% t(R) # Variance of m det(Var_m) # check for non-singularity W <- t(m)%*%solve(Var_m)%*%m # W = m' Var[m] m F_t <- as.numeric(W/J) # F-test statistic F_t F_t_asym <- J*F_t # Chi-square-test statistic (asymptotic) F_t_asym qf(.95, df1=J, df2=(T - k)) # exact distribution (F-dist) if errors normal (A5) p_val <- 1 - pf(F_t, df1=J, df2=(T - k)) # p-value(F_t) under (A5) p_val qchisq(.95, df=J) p_val <- 1 - pchisq(F_t_asym, df=J) # p-value(F_t) under asymptotic distrib. p_val ## Test Linear Restrictions using R package "car" install.packages("car") library(car) linearHypothesis(fit_ibm_ff3,c("SMB = 0.2","HML = 0.6"),test="F") # Exact F test linearHypothesis(fit_ibm_ff3,c("SMB = 0.2","HML = 0.6"),test="Chisq") # Asymptotic F-test linearHypothesis(fit_ibm_ff3,c("SMB = HML"),test="F") # testing beta_SMB = beta_HMB linearHypothesis(fit_ibm_ff3,c("SMB + HML = 1"),test="F") # testing beta_SMB + beta_HMB = 1 ### Testing the EH Model linearHypothesis(fit_eh,c("(Intercept) = 0","x = 0"),test="F") linearHypothesis(fit_eh,c("(Intercept) = 0","x = 0"),test="Chisq") ### Goodness of Fit Model y <- ibm_x T <- length(y) x0 <- matrix(1,T,1) x <- cbind(x0,Mkt_RF, SMB, HML) k <- ncol(x) b <- fit_ibm_ff3$coefficients # extracting OLS regression e <- y - x%*%b # we could have used e <- fit_ibm_ff3$residuals RSS <- as.numeric(t(e)%*%e) R2 <- 1 - as.numeric(RSS)/as.numeric(t(y)%*%y) #R-squared R2 summary(fit_ibm_ff3)$r.squared # extracting R-squared from fit-capm F_goodfit <- (R2/(k-1))/((1-R2)/(T-k)) #F-test of goodness of fit. F_goodfit ### Testing with Unrestricted and Restricted Estimations y <- ibm_x x0 <- matrix(1,T,1) x_r <- cbind(x0,Mkt_RF) # Restricted X vector k_r <- ncol(x_r) fit_ibm_ff3_R <- lm(y ~ Mkt_RF) # CAPM estimation ("Restricted FF3 Model) summary(fit_ibm_ff3_R) eR <- fit_ibm_ff3_R$residuals # extracting Restricted residuals RSSR <- as.numeric(t(eR)%*%eR) J <- k - k_r # J = degrees of freedom of numerator F_test <- ((RSSR - RSS)/J)/(RSS/(T-k)) F_test qf(.95, df1=J, df2=(T - k)) # exact distribution (F-dist) if e normal p_val <- 1 - pf(F_test, df1=J, df2=(T - k)) # p-value(F_t) under e normal p_val ### Testing with package lmtest install.packages("lmtest") library(lmtest) fit_wU <- lm (y ~ Mkt_RF + SMB + HML) fit_wR <- lm (y ~ Mkt_RF) waldtest(fit_wU, fit_wR)