Create some fake data for three groups. n=5, all data sampled from a normal distribution with mean = 0, sd =1.
A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)
knitr::kable(df)
groups | DV |
---|---|
A | -0.6625851 |
A | 1.4010315 |
A | 0.3777561 |
A | -0.8265563 |
A | 0.4482360 |
B | -0.8915240 |
B | -0.1462778 |
B | 1.8976125 |
B | 0.2460631 |
B | 0.0506295 |
C | 0.0842021 |
C | 0.7405169 |
C | 0.2532183 |
C | 0.5565542 |
C | -0.3962939 |
library(ggplot2)
library(dplyr)
plot_df <- df %>%
group_by(groups) %>%
summarise(means=mean(DV))
ggplot(plot_df, aes(x=groups, y=means, fill=groups))+
geom_bar(stat="identity")
summary(aov(DV~groups,df))
## Df Sum Sq Mean Sq F value Pr(>F)
## groups 2 0.029 0.0144 0.021 0.979
## Residuals 12 8.312 0.6927
for(i in 1:5){
A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)
plot_df <- df %>%
group_by(groups) %>%
summarise(means=mean(DV))
print(ggplot(plot_df, aes(x=groups, y=means, fill=groups))+
geom_bar(stat="identity"))
print(summary(aov(DV~groups,df)))
}
## Df Sum Sq Mean Sq F value Pr(>F)
## groups 2 0.615 0.3075 0.512 0.612
## Residuals 12 7.213 0.6011
## Df Sum Sq Mean Sq F value Pr(>F)
## groups 2 1.791 0.8956 0.765 0.487
## Residuals 12 14.042 1.1702
## Df Sum Sq Mean Sq F value Pr(>F)
## groups 2 0.63 0.3150 0.322 0.731
## Residuals 12 11.74 0.9783
## Df Sum Sq Mean Sq F value Pr(>F)
## groups 2 3.307 1.6533 2.184 0.155
## Residuals 12 9.085 0.7571
## Df Sum Sq Mean Sq F value Pr(>F)
## groups 2 2.700 1.3498 1.871 0.196
## Residuals 12 8.656 0.7214
The null F-distribution, are the values of F you could have found, if all of your samples in each group are coming from the same normal distribution. We can compute this by simulation (pretending to run the experiment 1,000 times)
save_F<-c()
for(i in 1:1000){
A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)
aov_sum <- summary(aov(DV~groups,df))
save_F[i]<-aov_sum[[1]]$`F value`[1]
}
plot_df <- data.frame(sims=1:1000,
save_F)
critical_f<-sort(save_F)[950]
ggplot(plot_df, aes(x=save_F))+
geom_histogram()+
geom_vline(xintercept=critical_f)
print(critical_f)
## [1] 3.935332