EN VI

Compare theoretical and empirical alpha in R [SOLVED]?

How to Compare theoretical and empirical alpha in R [SOLVED]

I've written the following code to compare the theoretical alpha = 0.05 with the empirical one from the buit-in t.test in Rstudio:

set.seed(1)
N <- 1000
n <- 20
k <- 500

poblacion <- rnorm(N, 10, 10) #Sample
mu.pob <- mean(poblacion)
sd.pob <- sd(poblacion)
p <- vector(length=k)
for (i in 1:k) {
  muestra <- poblacion[sample(1:N, n)]
  p[i] <- t.test(muestra, mu=mu.pob)$p.value
}
a_teo <- 0.05
a_emp <- length(p[p < a_teo])/k
sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp)

And it works printing both theoretical and empirical values. Now I wanna make it more general, to different values of 'n', so I wrote this:

set.seed(1)
N <- 1000
n <- 20
k <- 500

z <-c()
for (i in n){
  poblacion <- rnorm(N, 10, 10)
  mu.pob <- mean(poblacion)
  sd.pob <- sd(poblacion)
  p <- vector(length=k)
  for (j in 1:k){
     muestra <- poblacion[sample(1:N, length(n))]
     p[j] <- t.test(muestra, mu = mu.pob)$p.value
  }
  a_teo = 0.05
  a_emp = length(p[p<a_teo])/k
  append(z, a_emp)
  print(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
}
plot(n, z)

Solution:

The sprintf alone won't do in a for loop, you need wrap it in print.

> for (i in n) {
+   poblacion <- rnorm(N, 10, 10)
+   mu.pob <- mean(poblacion)
+   sd.pob <- sd(poblacion)
+   p <- vector(length=k)
+   for (j in 1:k) {
+     muestra <- poblacion[sample(1:N, length(n))]
+     p[j] <- t.test(muestra, mu=mu.pob)$p.value
+   }
+   a_teo <- 0.05
+   a_emp <- length(p[p<a_teo])/k
+   print(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
+ }
[1] "alpha_teo = 0.050 <-> alpha_emp = 0.056"
[1] "alpha_teo = 0.050 <-> alpha_emp = 0.050"
[1] "alpha_teo = 0.050 <-> alpha_emp = 0.064"
[1] "alpha_teo = 0.050 <-> alpha_emp = 0.048"

A more R-ish way to do this would be to wrap the logic in a function.

> comp_fn <- \(N, n, k, alpha=.05, verbose=FALSE) {
+   poblacion <- rnorm(N, 10, 10)
+   mu.pob <- mean(poblacion)
+   sd.pob <- sd(poblacion)
+   p <- replicate(k, t.test(poblacion[sample(1:N, n)], mu=mu.pob)$p.value)
+   a_emp <- length(p[p < alpha])/k
+   if (verbose) {
+     message(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
+   }
+   c(a_teo, a_emp)
+ }
> 
> set.seed(1)
> comp_fn(1000, 20, 500)
[1] 0.050 0.058
> comp_fn(1000, 20, 500, verbose=TRUE)
alpha_teo = 0.050 <-> alpha_emp = 0.042
[1] 0.050 0.042

To loop over different arguments, mapply is your friend.

> set.seed(1)
> mapply(comp_fn, 1000, c(2, 10, 15, 20), 500)
      [,1]  [,2]  [,3]  [,4]
[1,] 0.050 0.050 0.050 0.050
[2,] 0.058 0.054 0.048 0.046