library(dplyr)
library(tidyverse)
library(jsonlite)
library(xtable)
library(data.table)
E1_combined
Combination of mturk and sona
Load libraries
Import Data
# Read the text file from JATOS ...
read_file('data/E1B_self_reference_deID.JSON') %>%
# ... split it into lines ...
str_split('\n') %>% dplyr::first() %>%
# ... filter empty rows ...
discard(function(x) x == '') %>%
# ... parse JSON into a data.frame
map_dfr(fromJSON, flatten=T) -> all_data_mturk
read_file('data/E1C_self_reference_deID.JSON') %>%
# ... split it into lines ...
str_split('\n') %>% dplyr::first() %>%
# ... filter empty rows ...
discard(function(x) x == '') %>%
# ... parse JSON into a data.frame
map_dfr(fromJSON, flatten=T) -> all_data_sona
<- all_data_sona %>%
all_data_sona mutate(ID = paste0(ID,"sona"))
<- rbind(all_data_mturk,all_data_sona) all_data
Demographics
library(tidyr)
<- all_data %>%
demographics filter(trial_type == "survey-html-form") %>%
select(ID,response) %>%
unnest_wider(response) %>%
mutate(age = as.numeric(age))
<- demographics %>%
age_demographics summarize(mean_age = mean(age),
sd_age = sd(age),
min_age = min(age),
max_age = max(age))
<- apply(demographics[-1], 2, table) factor_demographics
A total of 121 participants were recruited from Amazon’s Mechanical Turk. Mean age was NA (range = NA to NA ). There were 78 females, and 39 males. There were 105 right-handed participants, and 12 left or both handed participants. 58 participants reported normal vision, and 53 participants reported corrected-to-normal vision. 94 participants reported English as a first language, and 23 participants reported English as a second language.
Pre-processing
50 workers from Amazon’s Mechanical Turk were paid $2.00 to complete this task. The only restriction was to limit workers to the US locale. Typewritten responses during the recall portion indicated that some of the participants were not completing the task as intended. For example, some of the responses appeared to be written by a chatbot, and did not contain words from the experiment.
We were were interested in analyzing data from participants who appeared to engage with the task as intended. To identify participants for inclusion we used accuracy data from the case judgment task. We included participants if their accuracy was 70% or greater.
Case judgment accuracy
Get case judgment accuracy for all participants.
<- all_data %>%
case_judgment filter(encoding_trial_type == "study_word",
== "case") %>%
study_instruction mutate(response = as.character(unlist(response))) %>%
mutate(accuracy = case_when(
== "0" & letter_case == "upper" ~ 1,
response == "1" & letter_case == "upper" ~ 0,
response == "0" & letter_case == "lower" ~ 0,
response == "1" & letter_case == "lower" ~ 1
response %>%
)) group_by(ID) %>%
summarise(percent_correct = mean(accuracy))
ggplot(case_judgment, aes(x=percent_correct))+
geom_histogram() +
geom_vline(xintercept=.7)
Response bias
Check to see whether participants pressed one button all of the time. Not used for exclusion.
# get response bias
<- all_data %>%
response_bias filter(encoding_trial_type == "study_word") %>%
mutate(response = as.character(unlist(response))) %>%
group_by(ID,study_instruction) %>%
count(response)
post-task questions
<- all_data %>%
post_questions filter(trial_type == "survey-text") %>%
select(ID,response) %>%
unnest_wider(response)
All exclusions
no exclusions
<- case_judgment %>%
all_excluded filter(percent_correct < .7) %>%
select(ID) %>%
pull()
length(all_excluded)
[1] 1
<- all_data %>%
filtered_data filter(ID %in% all_excluded == FALSE)
Accuracy analysis
Define Helper functions
To do, consider moving the functions into the R package for this project
# attempt general solution
## Declare helper functions
################
# get_mean_sem
# data = a data frame
# grouping_vars = a character vector of factors for analysis contained in data
# dv = a string indicated the dependent variable colunmn name in data
# returns data frame with grouping variables, and mean_{dv}, sem_{dv}
# note: dv in mean_{dv} and sem_{dv} is renamed to the string in dv
<- function(data, grouping_vars, dv, digits=3){
get_mean_sem <- data %>%
a group_by_at(grouping_vars) %>%
summarize("mean_{ dv }" := round(mean(.data[[dv]]), digits),
"sem_{ dv }" := round(sd(.data[[dv]])/sqrt(length(.data[[dv]])),digits),
.groups="drop")
return(a)
}
################
# get_effect_names
# grouping_vars = a character vector of factors for analysis
# returns a named list
# list contains all main effects and interaction terms
# useful for iterating the computation means across design effects and interactions
<- function(grouping_vars){
get_effect_names <- grouping_vars
effect_names if( length(grouping_vars > 1) ){
for( i in 2:length(grouping_vars) ){
<- c(effect_names,apply(combn(grouping_vars,i),2,paste0,collapse=":"))
effect_names
}
}<- strsplit(effect_names, split=":")
effects names(effects) <- effect_names
return(effects)
}
################
# print_list_of_tables
# table_list = a list of named tables
# each table is printed
# names are header level 3
<- function(table_list){
print_list_of_tables for(i in 1:length(table_list)){
cat("###",names(table_list[i]))
cat("\n")
print(knitr::kable(table_list[[i]]))
cat("\n")
} }
Conduct Analysis
Study phase immediate recall
# get recall proportion correct for each participant
<- filtered_data %>%
study_phase_recall filter(phase == "study_recall",
== "recall") %>%
encoding_recall separate(col = paragraph,
into = c("first_word","second_word"),
sep = " ",
remove = FALSE,
extra = "merge") %>%
mutate(accuracy = tolower(target_word) == tolower(first_word),
study_instruction = factor(study_instruction, levels= c("case","semantic","self"))) %>%
group_by(ID,study_instruction) %>%
summarize(percent_correct = mean(accuracy))
# get means in each question condition
<- get_mean_sem(study_phase_recall,
study_phase_recall_means grouping_vars = c("study_instruction"),
dv = "percent_correct")
# run ANOVA
<- study_phase_recall %>%
study_phase_recall ungroup() %>%
mutate(ID = as.factor(ID),
study_instruction = as.factor(study_instruction))
<- aov(percent_correct ~ study_instruction + Error(ID/study_instruction),
study_phase_recall_aov
study_phase_recall)
# save printable summaries
<- papaja::apa_print(study_phase_recall_aov)
study_phase_recall_apa_print
::kable(study_phase_recall_means) knitr
study_instruction | mean_percent_correct | sem_percent_correct |
---|---|---|
case | 0.873 | 0.018 |
semantic | 0.918 | 0.011 |
self | 0.905 | 0.015 |
During the encoding phase participants attempted to immediately recall half of the words following the primary judgment. We computed proportion of correctly recalled words for each participant separately in each encoding question condition. These means were submitted to a one-way repeated measures ANOVA, with question type as the sole factor. Mean proportion correctly recalled was 0.873 in the case judgment, 0.918 in the semantic judgment, and 0.905 in the self-reference condition; \(F(2, 234) = 4.03\), \(\mathit{MSE} = 0.02\), \(p = .019\), \(\hat{\eta}^2_G = .014\).
Study phase word judgment
# get recall proportion correct for each participant
<- filtered_data %>%
study_phase_judgment
filter(phase == "main_study",
== "study_word") %>%
encoding_trial_type
mutate(accuracy = case_when(study_instruction == "case" &
== "lower" &
letter_case == 1 ~ TRUE,
response == "case" &
study_instruction == "lower" &
letter_case == 0 ~ FALSE,
response == "case" &
study_instruction == "upper" &
letter_case == 0 ~ TRUE,
response == "case" &
study_instruction == "upper" &
letter_case == 1 ~ TRUE,
response == "semantic" &
study_instruction == "low" &
likeable == 0 ~ FALSE,
response == "semantic" &
study_instruction == "low" &
likeable == 1 ~ TRUE,
response == "semantic" &
study_instruction == "high" &
likeable == 0 ~ TRUE,
response == "semantic" &
study_instruction == "high" &
likeable == 1 ~ FALSE,
response == "self" &
study_instruction == "low" &
likeable == 0 ~ FALSE,
response == "self" &
study_instruction == "low" &
likeable == 1 ~ TRUE,
response == "self" &
study_instruction == "high" &
likeable == 0 ~ TRUE,
response == "self" &
study_instruction == "high" &
likeable == 1 ~ FALSE
response
),study_instruction = factor(study_instruction,
levels= c("case","semantic","self"))) %>%
group_by(ID,study_instruction) %>%
summarize(percent_correct = mean(accuracy)) %>%
ungroup()
# get means in each question condition
<- get_mean_sem(study_phase_judgment,
study_phase_judgment_means grouping_vars = c("study_instruction"),
dv = "percent_correct")
::kable(study_phase_judgment_means) knitr
study_instruction | mean_percent_correct | sem_percent_correct |
---|---|---|
case | 0.999 | 0.001 |
semantic | 0.961 | 0.006 |
self | 0.782 | 0.015 |
For completeness we report mean performance in the encoding phase for each of the word judgment condition. See above table.
Recall Test
# obtain recall data from typed answers
<- filtered_data %>%
recall_data filter(phase %in% c("recall_1","recall_2") == TRUE ) %>%
select(ID,phase,paragraph) %>%
pivot_wider(names_from = phase,
values_from = paragraph) %>%
mutate(recall_1 = unlist(lapply(recall_1,"[[",1)),
recall_2 = unlist(lapply(recall_2,"[[",1))) %>%
mutate(recall_1 = paste(recall_1,recall_2,sep = " ")) %>%
select(ID,recall_1) %>%
# separate_longer_delim(cols = recall_1,
# delim = " ") %>%
mutate(recall_1 = tolower(recall_1)) %>%
mutate(recall_1 = gsub("[^[:alnum:][:space:]]","",recall_1))
<- filtered_data %>%
encoding_words_per_subject filter(encoding_trial_type == "study_word",
== "main_study")
phase
<- left_join(encoding_words_per_subject,recall_data,by = 'ID') %>%
recall_data mutate(recall_1 = strsplit(recall_1," "))
# implement a spell-checking method
<- c()
recall_success <- c()
min_string_distance for(i in 1:dim(recall_data)[1]){
<- unlist(recall_data$recall_1[i])
recalled_words <- recalled_words[recalled_words != ""]
recalled_words if (length(recalled_words) == 0 ) recalled_words <- "nonerecalled"
<- tolower(recall_data$target_word[i]) %in% recalled_words
recall_success[i] <- min(sapply(recalled_words,FUN = function(x) {
min_string_distance[i] ::stringdist(a=x,b = tolower(recall_data$target_word[i]), method = "lv")
stringdist
}))
}
# recall proportion correct by subject
# correct for unequal conditions. 4 words in recall, 8 words in no recall
<- recall_data %>%
recall_data_subject mutate(recall_success = recall_success,
min_string_distance = min_string_distance) %>%
mutate(close_recall = min_string_distance <= 2) %>%
group_by(ID,study_instruction,encoding_recall,block_type) %>%
summarise(number_recalled = sum(recall_success),
number_close_recalled = sum(close_recall)) %>%
ungroup() %>%
mutate(proportion_recalled = case_when(encoding_recall == "no_recall" ~ number_close_recalled/6,
== "recall" ~ number_close_recalled/6)) %>%
encoding_recall mutate(ID = as.factor(ID),
study_instruction = as.factor(study_instruction),
encoding_recall = as.factor(encoding_recall),
block_type = as.factor(block_type))
# Condition means
<- get_mean_sem(recall_data_subject,
mean_recall_data c("study_instruction","encoding_recall", "block_type"),
"proportion_recalled") %>%
ungroup() %>%
mutate(study_instruction = factor(study_instruction,levels = c("case","semantic","self")),
`Retrieval Practice` = case_when(
== "no_recall" ~ "No Retrieval Practice \n during study \n",
encoding_recall == "recall" ~ "Retrieval Practice \n during study \n",
encoding_recall
))
<- ggplot(mean_recall_data,
recall_plot aes(x = study_instruction,
y = mean_proportion_recalled,
fill= `Retrieval Practice`))+
geom_bar(stat="identity",position="dodge",color="black") +
geom_errorbar(aes(ymin = mean_proportion_recalled - sem_proportion_recalled,
ymax = mean_proportion_recalled + sem_proportion_recalled),
width=.9, position=position_dodge2(width = 0.2, padding = 0.8)) +
ylab("Proportion words recalled")+
xlab("Study Instruction") +
theme_classic(base_size = 15)+
theme(legend.position = "top")+
facet_wrap(~block_type)
recall_plot
Recall test ANOVA
## Condition-level means
# get all possible main effects and interactions
<- get_effect_names(c("block_type","encoding_recall", "study_instruction"))
recall_effect_names
<- lapply(recall_effect_names, FUN = function(x) {
recall_effect_means get_mean_sem(data=recall_data_subject,
grouping_vars = x,
dv = "proportion_recalled") %>%
as.data.table()
})
# run ANOVA
<- aov(proportion_recalled ~ block_type*encoding_recall*study_instruction + Error(ID/(study_instruction*encoding_recall)), data = recall_data_subject)
recall_aov
# save printable summaries
<- papaja::apa_print(recall_aov)
recall_apa_print
::kable(xtable(summary(recall_aov))) knitr
Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
---|---|---|---|---|---|
block_type | 1 | 0.1618656 | 0.1618656 | 1.3525608 | 0.2472178 |
Residuals | 116 | 13.8821159 | 0.1196734 | NA | NA |
study_instruction | 2 | 1.7205744 | 0.8602872 | 23.2130614 | 0.0000000 |
block_type:study_instruction | 2 | 0.0610239 | 0.0305120 | 0.8233016 | 0.4402583 |
Residuals | 232 | 8.5980313 | 0.0370605 | NA | NA |
encoding_recall | 1 | 0.6528955 | 0.6528955 | 26.2258962 | 0.0000012 |
block_type:encoding_recall | 1 | 0.0102025 | 0.0102025 | 0.4098207 | 0.5233230 |
Residuals | 116 | 2.8878279 | 0.0248951 | NA | NA |
encoding_recall:study_instruction | 2 | 0.1207627 | 0.0603814 | 2.5906930 | 0.0771359 |
block_type:encoding_recall:study_instruction | 2 | 0.0738586 | 0.0369293 | 1.5844697 | 0.2072673 |
Residuals | 232 | 5.4072306 | 0.0233070 | NA | NA |
Collapse over blocking variable
<- get_mean_sem(recall_data_subject,
mean_recall_data_collapsed c("study_instruction","encoding_recall"),
"proportion_recalled") %>%
ungroup() %>%
mutate(study_instruction = factor(study_instruction,levels = c("case","semantic","self")),
`Retrieval Practice` = case_when(
== "no_recall" ~ "No Retrieval Practice \n during study \n",
encoding_recall == "recall" ~ "Retrieval Practice \n during study \n",
encoding_recall
))
<- ggplot(mean_recall_data_collapsed,
recall_plot_collapsed aes(x = study_instruction,
y = mean_proportion_recalled,
fill= `Retrieval Practice`))+
geom_bar(stat="identity",position="dodge",color="black") +
geom_errorbar(aes(ymin = mean_proportion_recalled - sem_proportion_recalled,
ymax = mean_proportion_recalled + sem_proportion_recalled),
width=.9, position=position_dodge2(width = 0.2, padding = 0.8)) +
ylab("Proportion words recalled")+
xlab("Study Instruction") +
theme_classic(base_size = 15)+
theme(legend.position = "top")
recall_plot_collapsed
## Condition-level means
# get all possible main effects and interactions
<- get_effect_names(c("encoding_recall", "study_instruction"))
recall_effect_names_collapsed
<- lapply(recall_effect_names_collapsed, FUN = function(x) {
recall_effect_means_collapsed get_mean_sem(data=recall_data_subject,
grouping_vars = x,
dv = "proportion_recalled") %>%
as.data.table()
})
# run ANOVA
<- aov(proportion_recalled ~ encoding_recall*study_instruction + Error(ID/(study_instruction*encoding_recall)), data = recall_data_subject)
recall_aov_collapsed
# save printable summaries
<- papaja::apa_print(recall_aov_collapsed)
recall_apa_print_collapsed
::kable(xtable(summary(recall_aov_collapsed))) knitr
Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
---|---|---|---|---|---|
Residuals | 117 | 14.0439815 | 0.1200340 | NA | NA |
study_instruction | 2 | 1.7205744 | 0.8602872 | 23.248172 | 0.0000000 |
Residuals | 234 | 8.6590552 | 0.0370045 | NA | NA |
encoding_recall | 1 | 0.6528955 | 0.6528955 | 26.358857 | 0.0000011 |
Residuals | 117 | 2.8980304 | 0.0247695 | NA | NA |
encoding_recall:study_instruction | 2 | 0.1207627 | 0.0603814 | 2.577816 | 0.0780951 |
Residuals | 234 | 5.4810891 | 0.0234235 | NA | NA |
simple effects
Compare retrieval practice vs no retrieval practice for each judgment condition
# Self condition
# Retrieval practice vs no retrieval practice
<- recall_data_subject %>%
self_retrieval_practice filter(study_instruction == "self")
<- t.test(proportion_recalled ~ encoding_recall, paired = TRUE, data= self_retrieval_practice)
self_retrieval_practice_t
<- papaja::apa_print(self_retrieval_practice_t)
self_retrieval_practice_t_apa
# Semantic condition
# Retrieval practice vs no retrieval practice
<- recall_data_subject %>%
semantic_retrieval_practice filter(study_instruction == "semantic")
<- t.test(proportion_recalled ~ encoding_recall, paired = TRUE, data= semantic_retrieval_practice)
semantic_retrieval_practice_t
<- papaja::apa_print(semantic_retrieval_practice_t)
semantic_retrieval_practice_t_apa
# Case condition
# Retrieval practice vs no retrieval practice
<- recall_data_subject %>%
case_retrieval_practice filter(study_instruction == "case")
<- t.test(proportion_recalled ~ encoding_recall, paired = TRUE, data= case_retrieval_practice)
case_retrieval_practice_t
<- papaja::apa_print(case_retrieval_practice_t) case_retrieval_practice_t_apa
Compare self-reference vs. semantic separately for retrieval and no-retrieval conditions.
# self reference effect in no retrieval practice condition
<- recall_data_subject %>%
self_semantic_no_retrieval filter(study_instruction %in% c("self","semantic") == TRUE,
== "no_recall")
encoding_recall
<- t.test(proportion_recalled ~ study_instruction, paired = TRUE, data= self_semantic_no_retrieval)
self_semantic_no_retrieval_t
<- papaja::apa_print(self_semantic_no_retrieval_t)
self_semantic_no_retrieval_t_apa
# self reference effect in retrieval practice condition
<- recall_data_subject %>%
self_semantic_retrieval filter(study_instruction %in% c("self","semantic") == TRUE,
== "recall")
encoding_recall
<- t.test(proportion_recalled ~ study_instruction, paired = TRUE, data= self_semantic_retrieval)
self_semantic_retrieval_t
<- papaja::apa_print(self_semantic_retrieval_t) self_semantic_retrieval_t_apa
Write-up
# use data.table
#t <- as.data.table(Accuracy$means$`encoding_stimulus_time:encoding_instruction`)
#t[encoding_stimulus_time==500 & encoding_instruction == "F"]$mean_correct
Results
There was a main effect of encoding question, \(F(2, 234) = 23.25\), \(\mathit{MSE} = 0.04\), \(p < .001\), \(\hat{\eta}^2_G = .052\). Mean proportion recall was lowest in the case condition (M = 0.143, SEM = 0.012), higher for the semantic condition (M = 0.219, SEM = 0.014), and highest for the self condition (M = 0.263, SEM = 0.015).
The retrieval practice effect was also significant, \(F(1, 117) = 26.36\), \(\mathit{MSE} = 0.02\), \(p < .001\), \(\hat{\eta}^2_G = .021\). Mean proportion recall was lower for items that did not receive retrieval practice (M = 0.178, SEM = 0.011), compared to items that did receive retrieval practice (M = 0.239, SEM = 0.012).
The interaction between retrieval practice and encoding question approached significance, \(F(2, 234) = 2.58\), \(\mathit{MSE} = 0.02\), \(p = .078\), \(\hat{\eta}^2_G = .004\).
save data
save.image("data/E1_combined.RData")