library(ggplot2)

getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}

### Interest in becoming a programmer
table(participants_control$q5)
print(paste('mode on question 5 -',  getmode(participants_control$q5)))
print(paste('median on question 5 -', median(as.numeric(participants_control$q5))))

# gender
table(participants_girls$q5)
getmode(participants_girls$q5)
median(participants_girls$q5)

table(participants_boys$q5)
getmode(participants_boys$q5)
median(participants_boys$q5)

wilcox.test(participants_girls$q5, participants_boys$q5)
model <- wilcox.test(participants_girls$q5, participants_boys$q5)
N <- 97+85
z <- qnorm(model$p.value/2)
r <- z/sqrt(N)

gender = c(rep(c("Girls"),5), rep(c("Boys"),5))
answers = rep(c('Agree', 'Slightly agree', 'Neutral', 'Slightly disagree', 'Disagree') , 2)
percentages = c((table(participants_girls$q5)/101*100), (table(participants_boys$q5)/89*100))
toplot <- data.frame(gender,answers,percentages)

barChartDistrubtion <- ggplot(toplot, aes(answers, percentages, fill=gender)) + 
  scale_fill_manual(values = c("#ffc107", "#004d40")) +
  labs(x = "CS career interest", y="percentage") +
  scale_x_discrete(limits=c('Agree', 'Slightly agree', 'Neutral', 'Slightly disagree', 'Disagree') ) +
  theme(axis.text = element_text(size = 12), axis.title = element_text(size = 14), legend.text = element_text(size = 12)) + 
  stat_summary(geom="bar", position="dodge")
plot(barChartDistrubtion)

# age
cor.test(participants_control$age, participants_control$q5, method = "spearman")
cor.test(participants_control$age_category, participants_control$q5, method = "spearman") 
model <- cor.test(participants_control$age_category, participants_control$q5, method = "spearman") 

table(participants_age_7_8$q5)
getmode(participants_age_7_8$q5)
median(participants_age_7_8$q5)

table(participants_age_9_10$q5)
getmode(participants_age_9_10$q5)
median(participants_age_9_10$q5)

table(participants_age_11_12$q5)
getmode(participants_age_11_12$q5)
median(participants_age_11_12$q5)

table(participants_age_13_14$q5)
getmode(participants_age_13_14$q5)
median(participants_age_13_14$q5)

model <- wilcox.test(participants_age_7_8$q5, participants_age_9_10$q5)
N <- 49+75
z <- qnorm(model$p.value/2)
r <- z/sqrt(N)

p = model$p.value
CL = 0.95
SE = sqrt(p*(1-p)/N)
z_star = round(qnorm((1-CL)/2,lower.tail=F),digits=2)
ME = z_star * SE

c(p-ME, p+ME)

wilcox.test(participants_age_7_8$q5, participants_age_9_10$q5)
wilcox.test(participants_age_7_8$q5, participants_age_11_12$q5)
wilcox.test(participants_age_7_8$q5, participants_age_13_14$q5)
wilcox.test(participants_age_9_10$q5, participants_age_11_12$q5)
wilcox.test(participants_age_9_10$q5, participants_age_13_14$q5)
wilcox.test(participants_age_11_12$q5, participants_age_13_14$q5)

wilcox.test(subset(participants_control, participants_control$age == 7)$q5, subset(participants_control, participants_control$age == 8)$q5)
wilcox.test(subset(participants_control, participants_control$age == 9)$q5, subset(participants_control, participants_control$age == 10)$q5)
wilcox.test(subset(participants_control, participants_control$age == 11)$q5, subset(participants_control, participants_control$age == 12)$q5)
wilcox.test(subset(participants_control, participants_control$age == 13)$q5, subset(participants_control, participants_control$age == 14)$q5)

# age & gender
participants_girls_7_8 = subset(participants_girls, participants_girls$age_category == 1)
participants_girls_9_10 = subset(participants_girls, participants_girls$age_category == 2)
participants_girls_11_12 = subset(participants_girls, participants_girls$age_category == 3)
participants_girls_13_14 = subset(participants_girls, participants_girls$age_category == 4)

table(participants_girls_7_8$q5)
getmode(participants_girls_7_8$q5)
median(participants_girls_7_8$q5)

table(participants_girls_9_10$q5)
getmode(participants_girls_9_10$q5)
median(participants_girls_9_10$q5)

table(participants_girls_11_12$q5)
getmode(participants_girls_11_12$q5)
median(participants_girls_11_12$q5)

table(participants_girls_13_14$q5)
getmode(participants_girls_13_14$q5)
median(participants_girls_13_14$q5)

participants_boys_7_8 = subset(participants_boys, participants_boys$age_category == 1)
participants_boys_9_10 = subset(participants_boys, participants_boys$age_category == 2)
participants_boys_11_12 = subset(participants_boys, participants_boys$age_category == 3)
participants_boys_13_14 = subset(participants_boys, participants_boys$age_category == 4)

table(participants_boys_7_8$q5)
getmode(participants_boys_7_8$q5)
median(participants_boys_7_8$q5)

table(participants_boys_9_10$q5)
getmode(participants_boys_9_10$q5)
median(participants_boys_9_10$q5)

table(participants_boys_11_12$q5)
getmode(participants_boys_11_12$q5)
median(participants_boys_11_12$q5)

table(participants_boys_13_14$q5)
getmode(participants_boys_13_14$q5)
median(participants_boys_13_14$q5)

table(participants_control$age, participants_control$q5)

wilcox.test(participants_girls_7_8$q5, participants_girls_9_10$q5)
wilcox.test(participants_girls_7_8$q5, participants_girls_11_12$q5)
wilcox.test(participants_girls_7_8$q5, participants_girls_13_14$q5)
wilcox.test(participants_girls_9_10$q5, participants_girls_11_12$q5)
wilcox.test(participants_girls_9_10$q5, participants_girls_13_14$q5)
wilcox.test(participants_girls_11_12$q5, participants_girls_13_14$q5)

wilcox.test(participants_boys_7_8$q5, participants_boys_9_10$q5)
wilcox.test(participants_boys_7_8$q5, participants_boys_11_12$q5)
wilcox.test(participants_boys_7_8$q5, participants_boys_13_14$q5)
wilcox.test(participants_boys_9_10$q5, participants_boys_11_12$q5)
wilcox.test(participants_boys_9_10$q5, participants_boys_13_14$q5)
wilcox.test(participants_boys_11_12$q5, participants_boys_13_14$q5)

wilcox.test(participants_girls_7_8$q5, participants_boys_7_8$q5)
wilcox.test(participants_girls_9_10$q5, participants_boys_9_10$q5)
wilcox.test(participants_girls_11_12$q5, participants_boys_11_12$q5)
wilcox.test(participants_girls_13_14$q5, participants_boys_13_14$q5)

wilcox.test(participants_girls_9_10$q5, participants_boys_9_10$q5)
model <- wilcox.test(participants_boys_9_10$q5, participants_girls_9_10$q5)
N <- 28+39
z <- qnorm(model$p.value/2)
r <- z/sqrt(N)

wilcox.test(participants_girls_11_12$q5, participants_boys_11_12$q5)

wilcox.test(participants_girls_13_14$q5, participants_boys_13_14$q5)
model <- wilcox.test(participants_girls_13_14$q5, participants_boys_13_14$q5)
N <- 10+7
z <- qnorm(model$p.value/2)
r <- z/sqrt(N)

rm(answers, CL, gender, ME, N, p, percentages, r, SE, z, z_star, toplot)
rm(barChartDistrubtion, model)
