############# Review of Testing AND Maximum Likelihood Estimation (Code #5 - Lecture 8) ############# #### Review: Testing CAPM (constant = alpha = 0) SFX_da <- read.csv("http://www.bauer.uh.edu/rsusmel/4397/Stocks_FX_1973.csv", head=TRUE, sep=",") names(SFX_da) summary(SFX_da) ## Extract variables from imported data x_ibm <- SFX_da$IBM # extract IBM price data x_xom <- SFX_da$XOM # extract XOM price data x_Mkt_RF <- SFX_da$Mkt_RF # extract Market excess returns (in %) x_RF <- SFX_da$RF # extract Risk-free rate (in %) x_SMB <- SFX_da$SMB # extract SMB excess returns (in %) x_HML <- SFX_da$HML # extract HML excess returns (in %) # Define log returns & adjust size of variables accordingly T <- length(x_ibm) # sample size lr_ibm <- log(x_ibm[-1]/x_ibm[-T]) # create IBM log returns (in decimal returns) lr_xom <- log(x_xom[-1]/x_xom[-T]) # create XOM log returns (in decimal returns) Mkt_RF <- x_Mkt_RF[-1]/100 # Adjust sample size to ( T-1) by removing 1st obs RF <- x_RF[-1]/100 # Adjust sample size and use decimal returns. SMB <- x_SMB[-1]/100 # Adjust sample size and use decimal returns. HML <- x_HML[-1]/100 # Adjust sample size and use decimal returns. y <- lr_ibm - RF y <- lr_xom - RF ### Test of H0: alpha = 0, using R's lm function fit_y_capm <- lm(y ~ Mkt_RF) summary(fit_y_capm) b_x <- fit_y_capm$coefficients # Extract from lm function OLS coefficients SE_x <- sqrt(diag(vcov(fit_y_capm))) # SE from fit_y_capm (also a kx1 vector) t_alpha <- b_x[1]/SE_x[1] # t-stat for H0: Alpha = 0 (same as in 3rd column of summary) t_alpha # Reject H0 if |t_alpha| > 1.96 (at 5% level) p_val_alpha <- (1- pnorm(abs(t_alpha))) * 2 # p-value for t_alpha p_val_alpha # (if p-value < .05 => reject) ### FF Model Estimation & Test of H0: Market Beta = 1 using R's lm function fit_y_ff3 <- lm(y ~ Mkt_RF + SMB + HML) # CAPM regression summary(fit_y_ff3) b_x <- fit_y_ff3$coefficients # Extract from lm function OLS coefficients SE_x <- sqrt(diag(vcov(fit_y_ff3))) # SE from fit_y_ff3 (also a kx1 vector) t_beta_mkt <- (b_x[2] - 1)/SE_x[2] # t-stat for H0: Market Beta = 1 (same as in 3rd column of summary) t_beta_mkt # Reject H0 if |t_beta_mkt| > 1.96 (at 5% level) p_val_beta_mkt <- (1- pnorm(abs(t_beta_mkt))) * 2 # pvalue for t_beta p_val_beta_mkt ##### Testing relevance of Market beta with F-M two pass regression with 25 ME/BM Fama-French portfolios (1926:July - 2025:July) FF_p_da <- read.delim("https://www.bauer.uh.edu/rsusmel/4397/FF_25_portfolios_f.txt", head=TRUE, sep="") FF_f_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/FF_3_factors_f.csv", head=TRUE, sep=",") names(FF_p_da) names(FF_f_da) summary(FF_p_da) summary(FF_f_da) # Extract variables from imported data Mkt_RF_fm <- FF_f_da$Mkt_RF # extract Market excess returns (in %) HML_fm <- FF_f_da$HML # extract HML returns (in %) SMB_fm <- FF_f_da$SMB # extract HML returns (in %) RF_fm <- FF_f_da$RF # extract Risk-free rate (in %) Y_p <- FF_p_da[,2:26] - RF_fm # Extract returns of 25 FF portfolios T <- length(HML_fm) # Number of observations (1926:July on) x0 <- matrix(1,T,1) x <- cbind(x0, Mkt_RF_fm) summary(Mkt_RF_fm) summary(Y_p) k <- ncol(Y_p) ## First Pass Allbs = NULL # Initialize empty (a space to put betas) for (i in seq(1,k,1)){ y <- Y_p[,i] # select Y (portfolio) b <- solve(t(x)%*% x)%*% t(x)%*%y # OLS regression = (X'X)^(-1) X'y Allbs =cbind(Allbs,b) # accumulate b as rows } mb <- rowMeans(Allbs) mb varb <- colVars(t(Allbs)) sd_b <- sqrt(varb) sd_b beta_ret <- cbind(colMeans(Y_p),t(Allbs)) cor(beta_ret[,1], beta_ret[,3]) plot(beta_ret[,1], beta_ret[,3], main="Scatterplot: Portfolio Returns & CAPM Beta", xlab="Mean Portfolio Returns ", ylab="Market Beta", pch=19) ## Second Pass (CAPM) fit_fm_capm_25 <- lm(beta_ret[,1] ~ beta_ret[,3]) summary(fit_fm_capm_25) #### Fama-French Factors x <- cbind(Mkt_RF_fm, SMB_fm, HML_fm) cor(x) # Check factor mean returns library(resample) colMeans(x) * 12 # Annualized factor return yy <- colVars(x) sqrt(yy) sqrt(yy) * sqrt(12) # Annualized factor SD #### Plots with time series (with Dates) Allbs=NULL # Initialize empty (a space to put betas) xo <- 100 j <- 1 # Select column to plot (j=1, Mkt_RF) for (i in seq(1,T,1)){ xo <- xo * (1+x[i,j]/100) # accumulate factor return Allbs=rbind(Allbs,xo) # keep xo as rows } X <- Allbs x.ts = ts(X, frequency = 12, start=c(1926, 7)) plot.ts(x.ts, col="blue",ylab ="Factor Return (%)", main="Market Factor Accumulated Return") # Accumulated factor return X[T,1]/100- 1 #### Testing Relevance of betas (Market beta) in the F-F 3-factor Model with 100 portfolio --check for missing data (-99.99) FF_p_da <- read.csv("https://www.bauer.uh.edu/rsusmel/4397/FF_100_portfolios_f.csv", head=TRUE, sep=",") names(FF_p_da) summary(FF_p_da) k <- ncol(FF_p_da) Y_p <- FF_p_da[,2:k] # Extract 100 FF portfolios (many with plenty of -99.99) T <- length(HML_fm) # June 2025 T0 <-283 # 283 = Jan 1950 x0 <- matrix(1,T,1) x <- cbind(x0, Mkt_RF_fm, SMB_fm, HML_fm) summary(Y_p) # Check min (-99.99) colMeans(Y_p) # Check for negatively inflated means # Selecting portfolios with no missing data (-99.99) after T0 (88 portfolios left) Y_p <- cbind(Y_p[,4:10],Y_p[,12:69],Y_p[,71:76],Y_p[,78:87],Y_p[,91:97]) x <- x[T0:T,] Y_p <- Y_p[T0:T,] k <- ncol(Y_p) summary(Y_p) # Check min (-99.99) colMeans(Y_p) # Check for negatively inflated means ## First Pass Allbs=NULL # Initialize empty (a space to put betas) for (i in seq(1,k,1)){ y <- Y_p[,i] # select Y (portfolio) b <-solve(t(x)%*% x)%*% t(x)%*%y # OLS regression Allbs=cbind(Allbs,b) # accumulate b as rows } mb <- rowMeans(Allbs) mb varb <- colVars(t(Allbs)) sd_b <- sqrt(varb) sd_b beta_ret <- cbind(colMeans(Y_p),t(Allbs)) cor(beta_ret[,1], beta_ret[,3]) plot(beta_ret[,1], beta_ret[,3], main="Scatterplot: Portfolio Returns & Market Beta", xlab="Mean Portfolio Returns ", ylab="Market Beta", pch=19) ## Second Pass (3 FF) - OLS regression mean portfolio returns against betas fit_fm_ff3_100 <- lm(beta_ret[,1] ~ beta_ret[,3:5]) # OLS mean portfolio returns vs betas (Mkt, SMB, HML) summary(fit_fm_ff3_100) #### Do the additional FF factors work? (Check for IBM & XOM) x_RMW <- SFX_da$RMW # Extract RMW factor x_CMA <- SFX_da$CMA # Extract CMA factor RMW <- x_RMW[-1]/100 # Adjust RMW data CMA <- x_CMA[-1]/100 # Adjust CMA data y <- lr_ibm - RF fit_y_ff5 <- lm(y ~ Mkt_RF + SMB + HML + RMW + CMA) summary(fit_y_ff5) y <- lr_xom - RF fit_y_ff5 <- lm(y ~ Mkt_RF + SMB + HML + RMW + CMA) summary(fit_y_ff5) #### Maximum Likelihood Example: Toss a coin 100. We get 60 heads p <- .50 # Initial value for p (=.50 if I believe coin is fair) h <- 60 # 60 heads n <- 100 # 100 times factorial(n)/(factorial(n-h)*factorial(h)) # combination of 60 heads out of 100 choose(n,h) # R function for combination of 60 heads out of 100 prob_h <- choose(n,h)*p^h*(1-p)^(n-h) # using binomial probability of observing h heads prob_h dbinom(h,100,p) # R way of calculating binomial prob of observing h heads # Calculate the p that maximizes the probability of observing 60 heads # Step 1 - Create Likelihood function likelihood_b <- function(p){ # Create a prob function with p as an argument dbinom(h, 100, p) } likelihood_b(p) # print likelihood negative_likelihood_b <- function(p){ # R uses a minimization algorithm, change sign of function dbinom(h, 100, p) * (-1) } negative_likelihood_b(p) # Step 2 - Maximize (or Minimize negative Likelihood function) nlm(negative_likelihood_b, p, stepmax=0.5) #nlm minimizes the function #### Normal sample: 5, 6, 7, 8, 9, 10 with unknown mean (& known SD=1) mu <- 0 # Initial value for mu (=mean) x_6 <- seq(5,10,by= 1) # observed data x_6 = (5, 6, 7, 8, 9, 10) x_6 dnorm(5, mu, sd=1) # probability of observing x=5 when data is from N(0,1) dnorm(x_6) # probability of observing whole vector x_6 when data is from N(0,1) prod(dnorm(x_6)) # likelihood function evaluated at observed data l_f <- prod(dnorm(x_6)) # assign l_f to the likelihood function evaluated at observed data l_f log(l_f) # log likelihood function evaluated at observed data sum(log(dnorm(x_6))) # compact notation for log likelihood function evaluated at observed data # Calculate the mu that maximizes the probability of observing {5, 6, 7, 8, 9, 10} # Step 1 - create Likelihood function likelihood_n <- function(mu){ # Create a prob function with mu as an argument sum(log(dnorm(x_6, mu, sd=1))) } likelihood_n(mu) # print likelihood negative_likelihood_n <- function(mu){ # R uses a minimization algorithm, change sign of function sum(log(dnorm(x_6, mu, sd=1))) * (-1) } negative_likelihood_n(mu) # Step 2 - Maximize (or Minimize negative Likelihood function) results_n <- nlm(negative_likelihood_n, mu, stepmax=2) # nlm minimizes the function results_n # Show nlm results mu_max <- results_n$estimate # Extract estimates mu_max # Should be equal to mean likelihood_n(mu_max) # Check max value at mu_max # Standard Error of Mu results_n <- nlm(negative_likelihood_n, mu, stepmax=2, hessian=TRUE) mu_hess <- results_n$hessian # Extract estimated Hessian (Matrix of 2nd derivatives) mu_hess # Show Hessian cov<-solve(mu_hess) # Invert hessian to get cov(mu_MLE) cov # Show variance for mu_MLE stderr<-sqrt(diag(cov)) # Compute S.E. for mu_MLE stderr