## Statistics, Summer term 2026, JGU Mainz
## Example:
## bootstrap confidence interval for the relative size of the dominant eigenvalue of a correlation matrix,
## data from B. Efron, R.J. Tibshirani, An Introduction to the Bootstrap, Chapman & Hall, 1993, see Table 7.1 there
scores <- read.csv("test_scores.csv")
n <- dim(scores)[1] # n=88 samples

## looking at the data:
pairs(scores) # (pairwise scatterplots)
hist(scores$mec, prob=TRUE)
hist(scores$vec, prob=TRUE)
hist(scores$alg, prob=TRUE)
hist(scores$ana, prob=TRUE)
hist(scores$sta, prob=TRUE)

## (empirical) covariance matrix:
G <- cor(scores)
eG <- eigen(G) # eigen decomposition of G
eG
G %*% eG$vectors[,1]; eG$values[1]*eG$vectors[,1] # (just to check)

tau <- eG$values[1]/(sum(eG$values)) # the test statistic (7.8) from Efron+Tibshirani
tau

compute.estimate <- function(sco) {
    eGG <- eigen(cor(sco))
    eGG$values[1]/(sum(eGG$values))
}

## create bootstrap replicates
B <- 5000

taub <- numeric(B)
for (b in 1:B) {
    sco <- scores[sample(1:n,size=n,replace=TRUE),]
    taub[b] <- compute.estimate(sco)
}

hist(taub, breaks=50, prob=TRUE)
meantaub <- mean(taub)
abline(v=meantaub, lwd=2)


sehb <- sqrt(sum((taub-meantaub)^2)/(B-1))
sehb

## creating studentized bootstrap values:
taubs <- (taub-meantaub)/sehb
hist(taubs, breaks=50, prob=TRUE)

## corresponding quantiles:
alpha <- 0.05
lowerqb <- quantile(taubs,alpha/2); upperqb <- quantile(taubs,1-alpha/2)
c(lowerqb, upperqb)
abline(v=c(lowerqb, upperqb), col="blue")

## this gives the "bootstrap-t" confidence interval for tau:
c(tau - upperqb*sehb, tau - lowerqb*sehb)
