Skip to contents
  1. Explain the concept of main effects and interactions with an example using R. For example, this could include a definition of main effects and interactions and a figure depicting main effects and an interaction along with an explanation of the patterns for each. A major point of this problem is for you to to engage in the task of developing an explanation of these concepts that would 1) be helpful for you to understand the concepts, and 2) could be helpful for others to understand these concepts. (3 points)
library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
#> ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
#> ✓ tibble  3.1.6     ✓ dplyr   1.0.8
#> ✓ tidyr   1.2.0     ✓ stringr 1.4.0
#> ✓ readr   2.1.2     ✓ forcats 0.5.1
#> Warning: package 'tidyr' was built under R version 4.1.2
#> Warning: package 'readr' was built under R version 4.1.2
#> Warning: package 'dplyr' was built under R version 4.1.2
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> x dplyr::filter() masks stats::filter()
#> x dplyr::lag()    masks stats::lag()

n <- 10
factorial_data <- tibble(A = factor(rep(c("L1","L2"), each = n)),
                         B = factor(rep(rep(c("L1","L2"), each=n/2),2)),
                         C = factor(rep(c("A","B","C","D"), each = n/2)),
                         DV = c(rnorm(n/2,-2,1),
                                rnorm(n/2,-2,1),
                                rnorm(n/2,2,1),
                                rnorm(n/2,4,1))
)
  
A_means <- factorial_data %>%
  group_by(A) %>%
  summarise(mean_DV_A = mean(DV))

B_means <- factorial_data %>%
  group_by(B) %>%
  summarise(mean_DV_B = mean(DV))

cell_means <- factorial_data %>%
  group_by(A,B) %>%
  summarise(mean_DV_cell = mean(DV))
#> `summarise()` has grouped output by 'A'. You can override using the `.groups`
#> argument.

grand_mean <- mean(factorial_data$DV)

factorial_data <- factorial_data %>%
  mutate(A_MeanDV = rep(A_means$mean_DV_A, each = n),
         B_MeanDV = rep(rep(B_means$mean_DV_B, each = n/2),2),
         cell_MeanDV = rep(cell_means$mean_DV_cell, each = n/2),
         grand_mean
         ) %>%
  mutate(DV_deviations = DV-grand_mean,
         A_deviations = (A_MeanDV - grand_mean),
         B_deviations = (B_MeanDV - grand_mean),
         straight_cell_deviations = (cell_MeanDV - grand_mean),
         cell_deviations = (A_MeanDV-grand_mean) + (B_MeanDV-grand_mean) - (cell_MeanDV-grand_mean)
         )

summary(aov(DV~A*B, data=factorial_data))
#>             Df Sum Sq Mean Sq F value   Pr(>F)    
#> A            1 122.09  122.09  185.74 3.19e-10 ***
#> B            1  10.37   10.37   15.78 0.001095 ** 
#> A:B          1  13.66   13.66   20.78 0.000322 ***
#> Residuals   16  10.52    0.66                     
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

sum(factorial_data$A_deviations^2)
#> [1] 122.0947
sum(factorial_data$B_deviations^2)
#> [1] 10.36959
sum(factorial_data$straight_cell_deviations^2) # note these are incorrect
#> [1] 146.1229
sum(factorial_data$cell_deviations^2)
#> [1] 13.65857
sum(factorial_data$DV_deviations^2)
#> [1] 156.6404
factorial_data <- factorial_data %>%
  mutate(score = 1:(n*2))

ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_hline(yintercept = mean(factorial_data$DV))+
  geom_point(aes(y=DV))+
  geom_segment(aes(xend = score, y=grand_mean, yend = DV), alpha = .9, color="red")


ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_bar(aes(y=DV_deviations), fill="red", stat="identity")+
  geom_bar(aes(y=A_deviations), fill="green", stat="identity", alpha=.5)


ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_bar(aes(y=DV_deviations), fill="red", stat="identity")+
  geom_bar(aes(y=B_deviations), fill="green", stat="identity", alpha=.5)


ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_bar(aes(y=DV_deviations), fill="red", stat="identity")+
  geom_bar(aes(y=cell_deviations), fill="green", stat="identity", alpha=.5)


ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_bar(aes(y=DV_deviations), fill="red", stat="identity")+
  geom_bar(aes(y=(A_deviations+B_deviations)), fill="green", stat="identity", alpha=.5)


ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_bar(aes(y=DV_deviations), fill="red", stat="identity")+
  geom_bar(aes(y=(A_deviations+B_deviations+cell_deviations)), fill="green", stat="identity", alpha=.5)

factorial_data <- factorial_data %>%
  mutate(score = 1:(n*2))

ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_hline(yintercept = mean(factorial_data$DV))+
  geom_point(aes(y=A_MeanDV), size= 3,color="black") +
  geom_point(aes(y=B_MeanDV), size=3) +
  geom_point(aes(y=B_MeanDV), color="white") +
  geom_point(aes(y=cell_MeanDV,color=C), size=3) +
  geom_point(aes(y=cell_MeanDV), color="white") +
  geom_point(aes(y=DV), color="black")

  
## SS Total

ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_hline(yintercept = mean(factorial_data$DV))+
  geom_point(aes(y=A_MeanDV), size= 3,color="black") +
  geom_point(aes(y=B_MeanDV), size=3) +
  geom_point(aes(y=B_MeanDV), color="white") +
  geom_point(aes(y=cell_MeanDV,color=C), size=3) +
  geom_point(aes(y=cell_MeanDV), color="white") +
  geom_point(aes(y=DV), color="black") +
  geom_segment(aes(xend = score, y=grand_mean, yend = DV_deviations), alpha = .9, color="red")


## SS A

ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_hline(yintercept = mean(factorial_data$DV))+
  geom_point(aes(y=A_MeanDV), size= 3,color="black") +
  geom_point(aes(y=B_MeanDV), size=3) +
  geom_point(aes(y=B_MeanDV), color="white") +
  geom_point(aes(y=cell_MeanDV,color=C), size=3) +
  geom_point(aes(y=cell_MeanDV), color="white") +
  geom_point(aes(y=DV), color="black") +
  geom_segment(aes(xend = score, y=grand_mean, yend = DV_deviations), alpha = .9, color="red") +
  geom_segment(aes(xend = score, y=grand_mean, yend = A_MeanDV), alpha = .9, color="green")


ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_hline(yintercept = mean(factorial_data$DV))+
  geom_point(aes(y=A_MeanDV), size= 3,color="black") +
  geom_point(aes(y=B_MeanDV), size=3) +
  geom_point(aes(y=B_MeanDV), color="white") +
  geom_point(aes(y=cell_MeanDV,color=C), size=3) +
  geom_point(aes(y=cell_MeanDV), color="white") +
  geom_point(aes(y=DV), color="black") +
  geom_segment(aes(xend = score, y=grand_mean, yend = DV_deviations), alpha = .9, color="red") +
  geom_segment(aes(xend = score, y=grand_mean, yend = B_MeanDV), alpha = .9, color="green")


ggplot(factorial_data, aes(x=score, shape=A, color=B))+
  geom_hline(yintercept = mean(factorial_data$DV))+
  geom_point(aes(y=A_MeanDV), size= 3,color="black") +
  geom_point(aes(y=B_MeanDV), size=3) +
  geom_point(aes(y=B_MeanDV), color="white") +
  geom_point(aes(y=cell_MeanDV,color=C), size=3) +
  geom_point(aes(y=cell_MeanDV), color="white") +
  geom_point(aes(y=DV), color="black") +
  geom_segment(aes(xend = score, y=grand_mean, yend = DV_deviations), alpha = .9, color="red") +
  geom_segment(aes(xend = score, y=grand_mean, yend = cell_MeanDV), alpha = .9, color="green")

  1. Complete the 2x2 factorial lab found here https://crumplab.github.io/statisticsLab/lab-10-factorial-anova.html, up to section 10.4.8. More specifically, your task is to follow that lab exercise to load in the data, transform the data into long-format, conduct a 2x2 between subjects ANOVA, and write a short results section reporting the main effects and interaction. (3 points)
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
all_data <- fread("data/stroop_stand.csv")

RTs <- c(as.numeric(unlist(all_data[,1])),
         as.numeric(unlist(all_data[,2])),
         as.numeric(unlist(all_data[,3])),
         as.numeric(unlist(all_data[,4]))
         )

Congruency <- rep(rep(c("Congruent","Incongruent"),each=50),2)
Posture <- rep(c("Stand","Sit"),each=100)
Subject <- rep(1:50,4)

stroop_df <- data.frame(Subject,Congruency,Posture,RTs)

library(tidyr)

stroop_long<- gather(all_data, key=Condition, value=RTs, 
                     congruent_stand, incongruent_stand,
                     congruent_sit, incongruent_sit)

new_columns <- tstrsplit(stroop_long$Condition, "_", names=c("Congruency","Posture"))
stroop_long <- cbind(stroop_long,new_columns)
stroop_long <- cbind(stroop_long,Subject=rep(1:50,4))

library(dplyr)
library(ggplot2)
library(tidyr)

plot_means <- stroop_long %>%
            group_by(Congruency,Posture) %>%
            summarise(mean_RT = mean(RTs),
                      SEM = sd(RTs)/sqrt(length(RTs)))
#> `summarise()` has grouped output by 'Congruency'. You can override using the
#> `.groups` argument.

ggplot(plot_means, aes(x=Posture, y=mean_RT, group=Congruency, fill=Congruency))+
  geom_bar(stat="identity", position="dodge")+
  geom_errorbar(aes(ymin=mean_RT-SEM, ymax=mean_RT+SEM), 
                position=position_dodge(width=0.9),
                width=.2)+
  theme_classic()+
  coord_cartesian(ylim=c(700,1000))

aov_out<-aov(RTs ~ Congruency*Posture, stroop_long)
summary_out<-summary(aov_out)
summary_out
#>                     Df  Sum Sq Mean Sq F value   Pr(>F)    
#> Congruency           1  576822  576822  43.734 3.49e-10 ***
#> Posture              1   32303   32303   2.449    0.119    
#> Congruency:Posture   1    6560    6560   0.497    0.481    
#> Residuals          196 2585080   13189                     
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. In chapter 10 of Crump et al. (2018), there is a discussion of patterns of main effects and interactions that can occur in a 2x2 design, which represents perhaps the simplest factorial design. There are 8 possible outcomes discussed https://crumplab.github.io/statistics/more-on-factorial-designs.html#looking-at-main-effects-and-interactions. Examples of these 8 outcomes are shown in two figures, one with bar graphs, and one with line graphs. Reproduce either of these figures using ggplot2. (3 points)

p1<- data.frame(IV1 = c("A","A","B","B"),
                IV2 = c("1","2","1","2"),
                means = c(6,6,6,6))
p2<- data.frame(IV1 = c("A","A","B","B"),
                IV2 = c("1","2","1","2"),
                means = c(10,10,5,5))
p3<- data.frame(IV1 = c("A","A","B","B"),
                IV2 = c("1","2","1","2"),
                means = c(10,13,5,2))
p4<- data.frame(IV1 = c("A","A","B","B"),
                IV2 = c("1","2","1","2"),
                means = c(5,10,10,15))
p5<- data.frame(IV1 = c("A","A","B","B"),
                IV2 = c("1","2","1","2"),
                means = c(10,18,5,7))
p6<- data.frame(IV1 = c("A","A","B","B"),
                IV2 = c("1","2","1","2"),
                means = c(10,2,10,2))
p7<- data.frame(IV1 = c("A","A","B","B"),
                IV2 = c("1","2","1","2"),
                means = c(2,12,5,9))
p8<- data.frame(IV1 = c("A","A","B","B"),
                IV2 = c("1","2","1","2"),
                means = c(5,10,10,5))
all_22s <- rbind(p1,p2,p3,p4,p5,p6,p7,p8)
type <- c(rep("~1, ~2, ~1x2",4),
          rep("1, ~2, ~1x2",4),
          rep("1, ~2, 1x2",4),
          rep("1, 2, ~1x2",4),
          rep("1, 2, 1x2",4),
          rep("~1, 2, ~1x2",4),
          rep("~1, 2, 1x2",4),
          rep("~1, ~2, 1x2",4))
type<-as.factor(type)
all_22s <- cbind(all_22s,type)
ggplot(all_22s, aes(x=IV1, y=means, group=IV2, fill=IV2))+
  geom_bar(stat="identity", position="dodge")+
  theme_classic()+
  facet_wrap(~type, nrow=2)+
  theme(legend.position = "top")


ggplot(all_22s, aes(x=IV1, y=means, group=IV2, color=IV2))+
  geom_point()+
  geom_line()+
  theme_classic()+
  facet_wrap(~type, nrow=2)+
  theme(legend.position = "top")

  1. In the conceptual section of this lab we used an R simulation to find the family-wise type I error rate for a simple factorial design with 2 independent variables. Use an R simulation to find the family-wise type I error rate for a factorial design with 3 independent variables. (3 points)
sims <- rbinom(10000,7,.05)
length(sims[sims > 0])/10000
#> [1] 0.2907

save_sim <- tibble()

# loop to conduct i number of simulations
for(i in 1:10000){

  #simulate null data for a 2x2
  n <- 12
  factorial_data <- tibble(A = factor(rep(c("L1","L2"), each = n)),
                         B = factor(rep(rep(c("L1","L2"), each = n/2),2)),
                         C = factor(rep(c("L1","L2"), n)),
                         DV = rnorm(n*2,0,1))
  # compute ANOVA
  output <- summary(aov(DV ~ A*B*C, data = factorial_data))

  #save p-values for each effect
  sim_tibble <- tibble(p_vals = output[[1]]$`Pr(>F)`[1:7],
                       effect = c("A","B","C","AxB","AxC","BxC","AxBxC"),
                       sim = rep(i,7))

  #add the saved values to the overall tibble
  save_sim <-rbind(save_sim,sim_tibble)
}

type_I_errors <- save_sim %>%
  filter(p_vals < .05) %>%
  group_by(sim) %>%
  count()

dim(type_I_errors)[1]/10000
#> [1] 0.2767
  1. Show that a 2x2 factorial ANOVA can be accomplished by using a one-factor ANOVA with three linear contrasts. (3 points)

library(tibble)
library(dplyr)

n <- 10
factorial_data <- tibble(A = factor(rep(c("L1","L2"), each = n)),
                         B = factor(rep(rep(c("L1","L2"), each = n/2),2)),
                         DV = rnorm(n*2,0,1))

aov_out <- aov(DV ~ A*B, data = factorial_data)
summary(aov_out)
#>             Df Sum Sq Mean Sq F value Pr(>F)  
#> A            1  0.008  0.0079   0.010 0.9227  
#> B            1  0.903  0.9026   1.105 0.3088  
#> A:B          1  2.954  2.9539   3.616 0.0754 .
#> Residuals   16 13.071  0.8170                 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

factorial_data <- factorial_data %>%
  mutate(C = factor(rep(c("A","B","C","D"), each = n/2)))

# define your new set of contrasts

c1 <- c(1,1,-1,-1)
c2 <- c(1,-1,1,-1)
c3 <- c(1,-1,-1,1)


# combine them into the columns of a matrix
my_contrasts <- cbind(c1, c2, c3)

# assign them to the contrast property for your IV
contrasts(factorial_data$C) <- my_contrasts

aov_out <- aov(DV ~ C, data = factorial_data)

(full_summary <- summary.aov(aov_out,
                             split=list(C=list("A"=1, 
                                                "B" = 2, 
                                                "A:B"= 3)
                                        )
                             )
  )
#>             Df Sum Sq Mean Sq F value Pr(>F)  
#> C            3  3.864  1.2881   1.577 0.2340  
#>   C: A       1  0.008  0.0079   0.010 0.9227  
#>   C: B       1  0.903  0.9026   1.105 0.3088  
#>   C: A:B     1  2.954  2.9539   3.616 0.0754 .
#> Residuals   16 13.071  0.8170                 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1