- Introduced bootstrap
- Looked at couple of example applications:
- confidence intervals
- hypotheses testing
- Two sources of errors:
- sampling variability
- bootstrap resampling variability
- Complete enumerations for exhaustive bootstrap
Stanford University, Spring 2016, STATS 205
library(bootstrap); data(law); t(law)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ## LSAT 576 635 558 578 666 580 555 661 651 605 653 575 545 572 594 ## GPA 339 330 281 303 344 307 300 343 336 313 312 274 276 288 296
## [1] 0.7763745
draw.bootstrap.sample = function() {
n = dim(law)[1]
ind = sample(n,replace = TRUE)
return(cor(law[ind,]$LSAT,law[ind,]$GPA))
}
B = 40000
thetastar = replicate(B,draw.bootstrap.sample())
Evaluate the correlation coefficient using a Monte Carlo simulation:
Create matrix of all \(\binom{2n-1}{n-1}\) enumerations:
library(partitions) n = 15 allCompositions = compositions(n,n)
allCompositions[,1:10]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] ## [1,] 15 14 13 12 11 10 9 8 7 6 ## [2,] 0 1 2 3 4 5 6 7 8 9 ## [3,] 0 0 0 0 0 0 0 0 0 0 ## [4,] 0 0 0 0 0 0 0 0 0 0 ## [5,] 0 0 0 0 0 0 0 0 0 0 ## [6,] 0 0 0 0 0 0 0 0 0 0 ## [7,] 0 0 0 0 0 0 0 0 0 0 ## [8,] 0 0 0 0 0 0 0 0 0 0 ## [9,] 0 0 0 0 0 0 0 0 0 0 ## [10,] 0 0 0 0 0 0 0 0 0 0 ## [11,] 0 0 0 0 0 0 0 0 0 0 ## [12,] 0 0 0 0 0 0 0 0 0 0 ## [13,] 0 0 0 0 0 0 0 0 0 0 ## [14,] 0 0 0 0 0 0 0 0 0 0 ## [15,] 0 0 0 0 0 0 0 0 0 0
Check number of compositions:
library(assertthat) nCompositions = dim(allCompositions)[2] nCompositions
## [1] 77558760
# Check if we get the same value as in theory
nCompositionsTheory = choose(2*n-1,n-1)
if(!assert_that(nCompositions == nCompositionsTheory)) {
print("there is a problem with the number of compositions")
} else {
print("the number of compositions is correct")
}
## [1] "the number of compositions is correct"
Evaluate the correlation coefficient for all \(\binom{2n-1}{n-1}\) bootstrap samples.
library(parallel)
if(!file.exists("enumData.Rdata")) {
ptm = proc.time()
enumData = mclapply(1:nCompositions, function(i) {
ind = allCompositions[,i]
law.list = lapply(1:n,function(j) matrix(rep(law[j,],ind[j]),ncol = 2 ,byrow = TRUE))
newLaw = do.call(rbind, law.list)
c(cor(unlist(newLaw[,1]),unlist(newLaw[,2])),dmultinom(ind,prob = rep(1,n)))
}, mc.cores = 4)
proc.time() - ptm
enumData = t(simplify2array(enumData))
colnames(enumData) = c("cor","weight")
save(enumData,file = "enumData.Rdata")
} else {
load("enumData.Rdata")
}
Same plot as before but now with all bootstrap samples:
Use the library parallel to speedup computations
library(parallel)
enumData = mclapply(1:nCompositions, function(i) { ... }, mc.cores = 4 )
000, 001, 011, 010, 110, 111, 101, 100000, 001, 010, 011, 100, 101, 110, 1110,100, 0110, 1100, 01, 11, 100111 and using the mappingwe get the corresponding gray code 0100
1000 and using the mappingwe get the corresponding gray code 1100
We construct a Markov chain to sample from \(P( T \ge t)\):
Then to estimate \(P( T \ge t)\):
For background on statistical functionals and asymptotics of the boostrap: