#IAT Improved algorithm
library(ggplot2)
source('functions_results.R')

#participants_subset <- part_all
#answers_subset <- answers_anonymized[is.element(answers_anonymized$participant_id, participants_subset$id),]

participants_subset <- participants_control_social #define here which subset to use
answers_subset <- answers_control[is.element(answers_control$participant_id, participants_subset$id),]

remove(answers_control, participants_control, participants_control_interest, participants_control_social)

# Step one: use data from all trials within combination blocks
## gender: 28 en 26
## social: 34 en 36
## interest: 32 en 30
consistent_id = 34
inconsistent_id = 36

answers_subset <- subset(answers_subset, answers_subset$question_id == consistent_id
                         | answers_subset$question_id == inconsistent_id)

# Divide block in 'practice' and 'test' 
answers_subset <- answers_subset[order(answers_subset$id),]
print(length(answers_subset$id)/64 == length(participants_subset$id))

answers_subset$block <- c(rep('practice', 12), rep('test', 20))

# Step two: a. Eliminate trials with latencies >10,000 ms
answers_subset <- subset(answers_subset, answers_subset$response_time<=10000)

# Step two: b. Eliminate subjects for whom more than 10% of trials have latency less than 300ms
# we have 32 trials in block 3, 32 trials in block 4. So more than 10% is at least 7 trials <300ms
# We go over each user id, make a subset with answers of specific user
# and delete the answers when more than 6 trials <300 ms

for(part_id in participants_subset$id){
  subset_on_part_id <- subset(answers_subset, answers_subset$participant_id == part_id)
  response_fast <- sum(subset_on_part_id$response_time < 300)
  if(response_fast>6){
    answers_subset <- subset(answers_subset, answers_subset$participant_id != part_id)
    participants_subset <- subset(participants_subset, participants_subset$id != part_id)
  }
  #if(response_fast>0){
  #  print(part_id)
  #  print(response_fast)
  #}
}

remove(subset_on_part_id, response_fast, part_id)

# Extra step: calculate diff response time and check errors made
answers_consistent_practice <- subset(answers_subset, answers_subset$question_id == consistent_id
                                      & answers_subset$block == 'practice')
answers_inconsistent_practice <- subset(answers_subset, answers_subset$question_id == inconsistent_id
                                        & answers_subset$block == 'practice')
answers_consistent_test <- subset(answers_subset, answers_subset$question_id == consistent_id
                                  & answers_subset$block == 'test')
answers_inconsistent_test <- subset(answers_subset, answers_subset$question_id == inconsistent_id
                                    & answers_subset$block == 'test')

answers_consistent <- subset(answers_subset, answers_subset$question_id == consistent_id)
answers_inconsistent <- subset(answers_subset, answers_subset$question_id == inconsistent_id)

t.test(answers_consistent_practice$response_time, answers_inconsistent_practice$response_time)
t.test(answers_consistent_test$response_time, answers_inconsistent_test$response_time)
t.test(answers_consistent$response_time, answers_inconsistent$response_time)

t.test(answers_consistent_practice$response_time, answers_consistent_test$response_time)
t.test(answers_inconsistent_practice$response_time, answers_inconsistent_test$response_time)

mean(answers_consistent$response_time)
median(answers_consistent$response_time)

mean(answers_inconsistent$response_time)
median(answers_inconsistent$response_time)

t.test(answers_consistent_practice$answers, answers_inconsistent_practice$answers)
t.test(answers_consistent_test$answers, answers_inconsistent_test$answers)
t.test(answers_consistent$answers, answers_inconsistent$answers)

t.test(answers_consistent_practice$answers, answers_consistent_test$answers)
t.test(answers_inconsistent_practice$answers, answers_inconsistent_test$answers)

mean(answers_consistent$answers)
median(answers_consistent$answers)

mean(answers_inconsistent$answers)
median(answers_inconsistent$answers)

mean(answers_consistent_practice$answers)
median(answers_consistent_test$answers)

mean(answers_inconsistent_practice$answers)
median(answers_inconsistent_test$answers)

rm(answers_consistent_practice, answers_inconsistent_practice, answers_consistent_test, 
   answers_inconsistent_test, answers_consistent, answers_inconsistent)

# Step 3+4+5 not needed in improved algorithm without error correction

# Step 6: a. Compute SD for trials in practice and test blocks 
answers_practice <- subset(answers_subset, answers_subset$block == 'practice')
answers_test <- subset(answers_subset, answers_subset$block == 'test')

sd_practice = sd(answers_practice$response_time)
sd_test = sd(answers_test$response_time)

participants_subset$sd_practice <- c(-1)
participants_subset$sd_test <- c(-1)

for(part_id in participants_subset$id){
  answers_practice <- subset(answers_subset, answers_subset$block == 'practice'
                                        & answers_subset$participant_id == part_id)
  answers_test <- subset(answers_subset, answers_subset$block == 'test'
                                      & answers_subset$participant_id == part_id)
  
  participants_subset[participants_subset$id==part_id,]$sd_practice <- sd(answers_practice$response_time)
  participants_subset[participants_subset$id==part_id,]$sd_test <- sd(answers_test$response_time)
}

rm(part_id)
rm(answers_practice, answers_test)

# Step 7 Replace error latency with block mean + 600 ms: not for this research

# Step 8: No transformation

# Step 9: Average per block 
answers_consistent_practice <- subset(answers_subset, answers_subset$question_id == consistent_id
                                             & answers_subset$block == 'practice')
answers_consistent_test <- subset(answers_subset, answers_subset$question_id == consistent_id
                                         & answers_subset$block == 'test')
answers_inconsistent_practice <- subset(answers_subset, answers_subset$question_id == inconsistent_id
                                               & answers_subset$block == 'practice')
answers_inconsistent_test <- subset(answers_subset, answers_subset$question_id == inconsistent_id
                                           & answers_subset$block == 'test')

mean_consistent_practice <- mean(answers_consistent_practice$response_time)
mean_consistent_test <- mean(answers_consistent_test$response_time)
mean_inconsistent_practice <- mean(answers_inconsistent_practice$response_time)
mean_inconsistent_test <- mean(answers_inconsistent_test$response_time)

participants_subset$mean_consistent_practice <- c(-1)
participants_subset$mean_consistent_test <- c(-1)
participants_subset$mean_inconsistent_practice <- c(-1)
participants_subset$mean_inconsistent_test <- c(-1)

for(part_id in participants_subset$id){
  answers_consistent_practice <- subset(answers_subset, answers_subset$question_id == consistent_id
                                        & answers_subset$block == 'practice'
                                        & answers_subset$participant_id == part_id)
  answers_consistent_test <- subset(answers_subset, answers_subset$question_id == consistent_id
                                    & answers_subset$block == 'test'
                                    & answers_subset$participant_id == part_id)
  answers_inconsistent_practice <- subset(answers_subset, answers_subset$question_id == inconsistent_id
                                          & answers_subset$block == 'practice'
                                          & answers_subset$participant_id == part_id)
  answers_inconsistent_test <- subset(answers_subset, answers_subset$question_id == inconsistent_id
                                      & answers_subset$block == 'test'
                                      & answers_subset$participant_id == part_id)
  
  participants_subset[participants_subset$id==part_id,]$mean_consistent_practice <- 
    mean(answers_consistent_practice$response_time)
  participants_subset[participants_subset$id==part_id,]$mean_consistent_test <- 
   mean(answers_consistent_test$response_time)
  participants_subset[participants_subset$id==part_id,]$mean_inconsistent_practice <- 
    mean(answers_inconsistent_practice$response_time)
  participants_subset[participants_subset$id==part_id,]$mean_inconsistent_test <- 
    mean(answers_inconsistent_test$response_time)
}

rm(answers_consistent_practice, answers_inconsistent_practice, answers_consistent_test, answers_inconsistent_test)
rm(part_id)

# Step 10 differences between blocks 
diff_practice = mean_inconsistent_practice - mean_consistent_practice
diff_test = mean_inconsistent_test - mean_consistent_test

participants_subset$diff_practice <- c(-1)
participants_subset$diff_test <- c(-1)

for(part_id in participants_subset$id){
  participants_subset[participants_subset$id==part_id,]$diff_practice <- 
    participants_subset[participants_subset$id==part_id,]$mean_inconsistent_practice - participants_subset[participants_subset$id==part_id,]$mean_consistent_practice
  participants_subset[participants_subset$id==part_id,]$diff_test <- 
    participants_subset[participants_subset$id==part_id,]$mean_inconsistent_test - participants_subset[participants_subset$id==part_id,]$mean_consistent_test
}

# Step 11 divide differences by SD
score_part_practice = diff_practice / sd_practice
score_test_practice = diff_test / sd_test

participants_subset$d_practice <- c(-1)
participants_subset$d_test <- c(-1)

for(part_id in participants_subset$id){
  participants_subset[participants_subset$id==part_id,]$d_practice <- 
    participants_subset[participants_subset$id==part_id,]$diff_practice / participants_subset[participants_subset$id==part_id,]$sd_practice
  participants_subset[participants_subset$id==part_id,]$d_test <- 
    participants_subset[participants_subset$id==part_id,]$diff_test / participants_subset[participants_subset$id==part_id,]$sd_test
}

# Step 12 Average the two quotients from step 11
result = mean(c(score_part_practice, score_test_practice))

participants_subset$d_result <- c(-1)

for(part_id in participants_subset$id){
  participants_subset[participants_subset$id==part_id,]$d_result <- 
    mean(c(participants_subset[participants_subset$id==part_id,]$d_practice, participants_subset[participants_subset$id==part_id,]$d_test))
}

mean(participants_subset$d_practice)
mean(participants_subset$d_test)
mean(participants_subset$d_result)
median(participants_subset$d_result)

t.test(participants_subset$d_result, mu=0)

box <- ggplot(participants_subset, aes(x="", y=d_result)) + geom_boxplot() 
box + labs(x ="Implicit association - social stereotype", y = "D measure") + ylim(-2,2) +
  theme(text = element_text(size = 16), axis.text = element_text(size = 18))

test = aggregate(answers_subset$response_time,by=list(answers_subset$img_link), FUN=mean)

mean_d_per_age <- aggregate(participants_subset$d_result,by=list(participants_subset$age), FUN=mean)
ggplot(mean_d_per_age, aes(x=Group.1, y=x)) + geom_point() + xlab('Age') +
  ylab('D measure') + scale_x_continuous(breaks = seq(7, 18, 1)) + 
  theme(text = element_text(size = 16))

rm(test)

rm(mean_d_per_familiar, familiar, familiar_often, familiar_not_often, familiar_media, not_familiar)
  
rm(part_id, sd_practice, sd_test, mean_consistent_practice, mean_consistent_test, mean_inconsistent_practice, mean_inconsistent_test)

rm(diff_practice, diff_test, result, score_part_practice, score_test_practice)