one-way ANOVA

Create some fake data for three groups. n=5, all data sampled from a normal distribution with mean = 0, sd =1.

A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)

knitr::kable(df)

groups	DV
A	-0.6625851
A	1.4010315
A	0.3777561
A	-0.8265563
A	0.4482360
B	-0.8915240
B	-0.1462778
B	1.8976125
B	0.2460631
B	0.0506295
C	0.0842021
C	0.7405169
C	0.2532183
C	0.5565542
C	-0.3962939

print ANOVA summary and graph

library(ggplot2)
library(dplyr)

plot_df <- df %>%
            group_by(groups) %>%
            summarise(means=mean(DV))
ggplot(plot_df, aes(x=groups, y=means, fill=groups))+
  geom_bar(stat="identity")

summary(aov(DV~groups,df))

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  0.029  0.0144   0.021  0.979
## Residuals   12  8.312  0.6927

repeat a few times

for(i in 1:5){
  
A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)

plot_df <- df %>%
            group_by(groups) %>%
            summarise(means=mean(DV))

print(ggplot(plot_df, aes(x=groups, y=means, fill=groups))+
  geom_bar(stat="identity"))

print(summary(aov(DV~groups,df)))

}

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  0.615  0.3075   0.512  0.612
## Residuals   12  7.213  0.6011

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  1.791  0.8956   0.765  0.487
## Residuals   12 14.042  1.1702

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2   0.63  0.3150   0.322  0.731
## Residuals   12  11.74  0.9783

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  3.307  1.6533   2.184  0.155
## Residuals   12  9.085  0.7571

##             Df Sum Sq Mean Sq F value Pr(>F)
## groups       2  2.700  1.3498   1.871  0.196
## Residuals   12  8.656  0.7214

F-distribution

The null F-distribution, are the values of F you could have found, if all of your samples in each group are coming from the same normal distribution. We can compute this by simulation (pretending to run the experiment 1,000 times)

save_F<-c()
for(i in 1:1000){
A <- rnorm(n=5,mean=0,sd=1)
B <- rnorm(n=5,mean=0,sd=1)
C <- rnorm(n=5,mean=0,sd=1)
DV <- c(A,B,C)
groups <- rep(c("A","B","C"), each=5)
df <- data.frame(groups,DV)

aov_sum <- summary(aov(DV~groups,df))
save_F[i]<-aov_sum[[1]]$`F value`[1]
}

plotting the F-distribution

plot_df <- data.frame(sims=1:1000,
                      save_F)

critical_f<-sort(save_F)[950]

ggplot(plot_df, aes(x=save_F))+
  geom_histogram()+
  geom_vline(xintercept=critical_f)

print(critical_f)

## [1] 3.935332

ANOVA

Matt

4/1/2019

one-way ANOVA

print ANOVA summary and graph

repeat a few times

F-distribution

plotting the F-distribution