############# Bootstrap: Estimation and C.I. (Code #2) #############

Sh_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/Shiller_data.csv", head=TRUE, sep=",")
names(Sh_da)
SP <- Sh_da$P					# Extract P = S&P500 series
D <- Sh_da$D					# Extract D = Dividend S&P500 series
i <- Sh_da$Long_i				# Extract Long_i = 10-year interest rate series
T <- length(SP)
lret <- log((SP[-1]+D[-1]/12)/SP[-T])		# Define log returns
rf <- mean(i)/100/12
rf
T_s <- length (lr)
T0 <- T-3

exc_ret <- lret[1:T0] - rf
var_exc_ret <- var(exc_ret)

erp <- mean(exc_ret)  				# mean monthly ERP
erp
erp * 12   				  	# annual ERP

# t-test
t_hat <- (erp - 0)/sqrt(var_exc_ret/T_s)
t_hat 

# 98% C.I.
z_98 <- 2.33					#z-value at alpha =.02
CI_lb <- erp - z_98 * sqrt(var_exc_ret/T_s)	# Lower bound
CI_ub <- erp + z_98 * sqrt(var_exc_ret/T_s)	# Upper bound
CI_lb
CI_ub

ci_98 <- c(erp - z_98 * sqrt(var_exc_ret/T_s),  erp + z_98 * sqrt(var_exc_ret/T_s))
ci_98

ci_98 * 12					# Annualized C.I.

## Note by looking at 98% C.I., 1% is outside range. This is Mehra & Prescott point, ERP is "too high"


##### Bootstrap from scratch & Using Boot for variance of log returns

lr <- lret[1:T0]
lr_var <- var(lr)				# Define variance of log returns
sqrt(lr_var)

sim_size <- 1000				# B = size of bootstrap
library(resample)				# call library resample


#### Percentile Bootstrap from scratch (using sample function from resample package)
data_star <- sample(lr, T_s*sim_size, replace=TRUE) 	# create B resamples of size T_s
boot_sample <- matrix(data_star, nrow=T_s, ncol=sim_size)  # organize resamples in matrix
boots_vars  <- colVars(boot_sample)			# compute the variance for each bootsrap sample

q <- quantile(boots_vars , c(0.025, 0.975))		# Find the 0.025 and 0.975 quantile for q_star
ci <- c(q[1], q[2])					# Calculate the 95% C.I. for the variance.
cat("Confidence interval: ",ci, "\n")			# Print C.I using cat 

ci
sqrt(ci)


## Bootstrap using boot and defining function of statistic of interest
install.packages("boot")
library(boot)

# function to obtain the variance from the data
var_p <- function(data, i) {
			d <- data[i]
			return(var(d))
}

boot.samps <- boot(data=lr, statistic=var_p, R=sim_size)	# resampling and theta* estimation

## Percentile method using boot
boot.ci(boot.samps, type = "perc")

new <- sort(boot.samps$t)
sqrt(new[25]) 			 		# CI's Lower Bound 
sqrt(new[975]) 			 		# CI's Upper Bound 

hist(boot.samps$t, breaks=12, main="Histogram for Boostrapped Variances", xlab="Bootstrapped Variances")



#### Empirical Bootstrap
data_star <- sample(lr, T_s*sim_size, replace=TRUE) 	# create B resamples of size T_s
boot_sample <- matrix(data_star, nrow=T_s, ncol=sim_size)  # organize resamples in matrix
boots_vars  <- colVars(boot_sample)			# compute the variance for each bootsrap sample
q_star <- boots_vars - lr_var				# Compute q* for each bootstrap sample

q <- quantile(q_star, c(0.025, 0.975))			# Find the 0.025 and 0.975 quantile for q_star
ci <- lr_var - c(q[2], q[1])				# Calculate the 95% C.I. for the variance.
cat("Confidence interval: ",ci, "\n")			# Print C.I using cat 

ci
sqrt(ci)


## Empirical Bootstrap using boot
boot.samps <- boot(data=lr, statistic=var_p, R=sim_size)	# resampling and theta* estimation
boot.ci(boot.samps, type = "basic")		# boot.ci computes the CI.
q_star <- boot.samps$t - lr_var 		# q* = theta* −  theta^
q_ad <- sort(q_star)				# sort q*
lr_var - q_ad[975]				# CI's Lower Bound
lr_var - q_ad[25]			 	# CI's Upper Bound 			

sqrt(lr_var - q_ad[975])
sqrt(lr_var - q_ad[25])


#### Parametric Bootstrap assuming a Normal Dist
lr_var						# s2
lr_sd <- sqrt(lr_var)
m_lr <- mean(lr)				# mean
m_lr

x <- rnorm(T_s*sim_size, mean=m_lr, sd=lr_sd)		# generate normal data (mean & sd from data)
boot_par_sample <- matrix(x, nrow=T_s, ncol=sim_size)	# organize simulated data
boots_vars  <- colVars(boot_par_sample)			# compute variances
q_star <- boots_vars - lr_var				# center distribution around lr_var

q <- quantile(q_star, c(0.025, 0.975))
ci <- lr_var - c(q[2], q[1]) 
ci 
sqrt(ci)


# Sample SE of Var of bootstrap
var(boots_vars)
sqrt(var(boots_vars))



#### Empirical C.I. for Sharpe Ratio
sr <- (mean(lr) - rf)/sd(lr)
sr

### function to obtain the Sharpe Ratio from the data
sr_p <- function(data, i) {
			d <- data[i]
sr <- (mean(d) - rf)/sd(d)
			return(sr)
}

boot.samps <- boot(data=lr, statistic=sr_p, R=sim_size)	# resampling and θ* estimation
boot.ci(boot.samps, type = "basic")		# boot.ci computes the CI.

# Check Bounds
q_star <- boot.samps$t - sr			# q* = theta* -  theta^
q_ad <- sort(q_star)				# sort q*
sr- q_ad[975]					# CI's Lower Bound
sr - q_ad[25]			 		# CI's Upper Bound 			


# Percentile method
new <- sort(boot.samps$t)
new[25] 			 		# CI's Lower Bound 
new[975] 			 		# CI's Upper Bound 

hist(boot.samps$t, breaks=12, main="Histogram for Boostrapped Variances", xlab="Bootstrapped Variances")