############# Bootstrap: Estimation and C.I. (Code #2) ############# Sh_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Shiller_data.csv", head=TRUE, sep=",") names(Sh_da) SP <- Sh_da$P # Extract P = S&P500 series D <- Sh_da$D # Extract D = Dividend S&P500 series i <- Sh_da$Long_i # Extract Long_i = 10-year interest rate series T <- length(SP) lr <- log((SP[-1]+D[-1]/12)/SP[-T]) # Define log returns rf <- mean(i)/100/12 rf T_s <- length (lr) exc_ret <- lr - rf var_exc_ret <- var(exc_ret) erp <- mean(exc_ret) # mean monthly ERP erp erp * 12 # annual ERP # t-test t_hat <- (erp - 0)/sqrt(var_exc_ret/T_s) t_hat # 98% C.I. z_98 <- 2.33 #z-value at alpha =.02 CI_lb <- erp - z_98 * sqrt(var_exc_ret/T_s) # Lower bound CI_ub <- erp + z_98 * sqrt(var_exc_ret/T_s) # Upper bound CI_lb CI_ub ci_98 <- c(erp - z_98 * sqrt(var_exc_ret/T_s), erp + z_98 * sqrt(var_exc_ret/T_s)) ci_98 ci_98 * 12 # Annualized C.I. ## Note by looking at 98% C.I., 1% is outside range. This is Mehra & Prescott point, ERP is "too high" ##### Bootstrap from scratch & Using Boot for variance of log returns lr_var <- var(lr) # Define variance of log returns sqrt(lr_var) sim_size <- 1000 # B = size of bootstrap library(resample) # call library resample #### Percentile Bootstrap from scratch (using sample function from resample package) data_star <- sample(lr, T_s*sim_size, replace=TRUE) # create B resamples of size T_s boot_sample <- matrix(data_star, nrow=T_s, ncol=sim_size) # organize resamples in matrix boots_vars <- colVars(boot_sample) # compute the variance for each bootsrap sample q <- quantile(boots_vars , c(0.025, 0.975)) # Find the 0.025 and 0.975 quantile for q_star ci <- c(q[1], q[2]) # Calculate the 95% C.I. for the variance. cat("Confidence interval: ",ci, "\n") # Print C.I using cat ci sqrt(ci) ## Bootstrap using boot and defining function of statistic of interest install.packages("boot") library(boot) # function to obtain the variance from the data var_p <- function(data, i) { d <- data[i] return(var(d)) } boot.samps <- boot(data=lr, statistic=var_p, R=sim_size) # resampling and theta* estimation ## Percentile method using boot boot.ci(boot.samps, type = "perc") new <- sort(boot.samps$t) sqrt(new[25]) # CI's Lower Bound sqrt(new[975]) # CI's Upper Bound hist(boot.samps$t, breaks=12, main="Histogram for Boostrapped Variances", xlab="Bootstrapped Variances") #### Empirical Bootstrap data_star <- sample(lr, T_s*sim_size, replace=TRUE) # create B resamples of size T_s boot_sample <- matrix(data_star, nrow=T_s, ncol=sim_size) # organize resamples in matrix boots_vars <- colVars(boot_sample) # compute the variance for each bootsrap sample q_star <- boots_vars - lr_var # Compute q* for each bootstrap sample q <- quantile(q_star, c(0.025, 0.975)) # Find the 0.025 and 0.975 quantile for q_star ci <- lr_var - c(q[2], q[1]) # Calculate the 95% C.I. for the variance. cat("Confidence interval: ",ci, "\n") # Print C.I using cat ci ## Empirical Bootstrap using boot boot.samps <- boot(data=lr, statistic=var_p, R=sim_size) # resampling and theta* estimation boot.ci(boot.samps, type = "basic") # boot.ci computes the CI. q_star <- boot.samps$t - lr_var # q* = theta* − theta^ q_ad <- sort(q_star) # sort q* lr_var - q_ad[975] # CI's Lower Bound lr_var - q_ad[25] # CI's Upper Bound sqrt(lr_var - q_ad[975]) sqrt(lr_var - q_ad[25]) #### Parametric Bootstrap assuming a Normal Dist lr_var # s2 lr_sd <- sqrt(lr_var) m_lr <- mean(lr) # mean m_lr x <- rnorm(T_s*sim_size, mean=m_lr, sd=lr_sd) # generate normal data (mean & sd from data) boot_par_sample <- matrix(x, nrow=T_s, ncol=sim_size) # organize simulated data boots_vars <- colVars(boot_par_sample) # compute variances q_star <- boots_vars - lr_var # center distribution around lr_var q <- quantile(q_star, c(0.025, 0.975)) ci <- lr_var - c(q[2], q[1]) ci sqrt(ci) # Sample SE of Var of bootstrap var(boots_vars) sqrt(var(boots_vars)) #### Empirical C.I. for Sharpe Ratio sr <- (mean(lr) - rf)/sd(lr) sr ### function to obtain the Sharpe Ratio from the data sr_p <- function(data, i) { d <- data[i] sr <- (mean(d) - rf)/sd(d) return(sr) } boot.samps <- boot(data=lr, statistic=sr_p, R=sim_size) # resampling and θ* estimation boot.ci(boot.samps, type = "basic") # boot.ci computes the CI. # Check Bounds q_star <- boot.samps$t - sr # q* = theta* - theta^ q_ad <- sort(q_star) # sort q* sr- q_ad[975] # CI's Lower Bound sr - q_ad[25] # CI's Upper Bound # Percentile method new <- sort(boot.samps$t) new[25] # CI's Lower Bound new[975] # CI's Upper Bound hist(boot.samps$t, breaks=12, main="Histogram for Boostrapped Variances", xlab="Bootstrapped Variances")