- Introduced bootstrap
- Looked at couple of example applications:
- confidence intervals
- hypotheses testing
- Two sources of errors:
- sampling variability
- bootstrap resampling variability
- Complete enumerations for exhaustive bootstrap
Stanford University, Spring 2016, STATS 205
library(bootstrap); data(law); t(law)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 ## LSAT 576 635 558 578 666 580 555 661 651 605 653 575 545 572 594 ## GPA 339 330 281 303 344 307 300 343 336 313 312 274 276 288 296
## [1] 0.7763745
draw.bootstrap.sample = function() { n = dim(law)[1] ind = sample(n,replace = TRUE) return(cor(law[ind,]$LSAT,law[ind,]$GPA)) } B = 40000 thetastar = replicate(B,draw.bootstrap.sample())
Evaluate the correlation coefficient using a Monte Carlo simulation:
Create matrix of all \(\binom{2n-1}{n-1}\) enumerations:
library(partitions) n = 15 allCompositions = compositions(n,n)
allCompositions[,1:10]
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] ## [1,] 15 14 13 12 11 10 9 8 7 6 ## [2,] 0 1 2 3 4 5 6 7 8 9 ## [3,] 0 0 0 0 0 0 0 0 0 0 ## [4,] 0 0 0 0 0 0 0 0 0 0 ## [5,] 0 0 0 0 0 0 0 0 0 0 ## [6,] 0 0 0 0 0 0 0 0 0 0 ## [7,] 0 0 0 0 0 0 0 0 0 0 ## [8,] 0 0 0 0 0 0 0 0 0 0 ## [9,] 0 0 0 0 0 0 0 0 0 0 ## [10,] 0 0 0 0 0 0 0 0 0 0 ## [11,] 0 0 0 0 0 0 0 0 0 0 ## [12,] 0 0 0 0 0 0 0 0 0 0 ## [13,] 0 0 0 0 0 0 0 0 0 0 ## [14,] 0 0 0 0 0 0 0 0 0 0 ## [15,] 0 0 0 0 0 0 0 0 0 0
Check number of compositions:
library(assertthat) nCompositions = dim(allCompositions)[2] nCompositions
## [1] 77558760
# Check if we get the same value as in theory nCompositionsTheory = choose(2*n-1,n-1) if(!assert_that(nCompositions == nCompositionsTheory)) { print("there is a problem with the number of compositions") } else { print("the number of compositions is correct") }
## [1] "the number of compositions is correct"
Evaluate the correlation coefficient for all \(\binom{2n-1}{n-1}\) bootstrap samples.
library(parallel) if(!file.exists("enumData.Rdata")) { ptm = proc.time() enumData = mclapply(1:nCompositions, function(i) { ind = allCompositions[,i] law.list = lapply(1:n,function(j) matrix(rep(law[j,],ind[j]),ncol = 2 ,byrow = TRUE)) newLaw = do.call(rbind, law.list) c(cor(unlist(newLaw[,1]),unlist(newLaw[,2])),dmultinom(ind,prob = rep(1,n))) }, mc.cores = 4) proc.time() - ptm enumData = t(simplify2array(enumData)) colnames(enumData) = c("cor","weight") save(enumData,file = "enumData.Rdata") } else { load("enumData.Rdata") }
Same plot as before but now with all bootstrap samples:
Use the library parallel to speedup computations
library(parallel) enumData = mclapply(1:nCompositions, function(i) { ... }, mc.cores = 4 )
000, 001, 011, 010, 110, 111, 101, 100
000, 001, 010, 011, 100, 101, 110, 111
0,1
00, 01
10, 11
00, 01, 11, 10
0111
and using the mappingwe get the corresponding gray code 0100
1000
and using the mappingwe get the corresponding gray code 1100
We construct a Markov chain to sample from \(P( T \ge t)\):
Then to estimate \(P( T \ge t)\):
For background on statistical functionals and asymptotics of the boostrap: