one-way ANOVA

Create some fake data for three groups. n=5, all data sampled from a normal distribution with mean = 0, sd =1.

A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)

knitr::kable(df)
groups DV
A -0.6625851
A 1.4010315
A 0.3777561
A -0.8265563
A 0.4482360
B -0.8915240
B -0.1462778
B 1.8976125
B 0.2460631
B 0.0506295
C 0.0842021
C 0.7405169
C 0.2532183
C 0.5565542
C -0.3962939

repeat a few times

for(i in 1:5){
  
A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)

plot_df <- df %>%
            group_by(groups) %>%
            summarise(means=mean(DV))

print(ggplot(plot_df, aes(x=groups, y=means, fill=groups))+
  geom_bar(stat="identity"))

print(summary(aov(DV~groups,df)))

}

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  0.615  0.3075   0.512  0.612
## Residuals   12  7.213  0.6011

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  1.791  0.8956   0.765  0.487
## Residuals   12 14.042  1.1702

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2   0.63  0.3150   0.322  0.731
## Residuals   12  11.74  0.9783

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  3.307  1.6533   2.184  0.155
## Residuals   12  9.085  0.7571

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  2.700  1.3498   1.871  0.196
## Residuals   12  8.656  0.7214

F-distribution

The null F-distribution, are the values of F you could have found, if all of your samples in each group are coming from the same normal distribution. We can compute this by simulation (pretending to run the experiment 1,000 times)

save_F<-c()
for(i in 1:1000){
A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)

aov_sum <- summary(aov(DV~groups,df))
save_F[i]<-aov_sum[[1]]$`F value`[1]
}

plotting the F-distribution

plot_df <- data.frame(sims=1:1000,
                      save_F)

critical_f<-sort(save_F)[950]

ggplot(plot_df, aes(x=save_F))+
  geom_histogram()+
  geom_vline(xintercept=critical_f)

print(critical_f)
## [1] 3.935332