import pandas as pd
from scipy import stats
import numpy as np
import rqFunctions as rq
import re

## Prepare data and dataset
programs = rq.importCSV('data.csv', False)
programs = rq.dropEmptyPrograms(programs, False)
programs_LOC = rq.calculateLOC(programs)
programs_LOC = rq.add_keywords_as_column(programs_LOC)
programs_LOC = rq.calculate_keywords_count(programs_LOC)

## LOC, unique and total keywords mean + std
if True:
    for i in range(5):
        level = i+1
        data = programs_LOC.loc[(programs_LOC['level'] == level)]

        print("\nLEVEL", level)

        if True:
            print('n boys', len(data.loc[data['gender'] == 'm']))
            print('n girls', len(data.loc[data['gender'] == 'f']))
            print('n others', len(data.loc[data['gender'] == 'o']))

            print('\nLOC')
            rq.column_mean_std(data, 'LOC')
            print('\nunique')
            rq.column_mean_std(data, 'unique_keywords')
            print('\ntotal')
            rq.column_mean_std(data, 'total_keywords')

        if True:
            print("\n story \n")
            rq.column_mean_std(data.loc[data['adventure_name'] == 'story'], 'LOC')
            rq.column_mean_std(data.loc[data['adventure_name'] == 'story'], 'unique_keywords')
            rq.column_mean_std(data.loc[data['adventure_name'] == 'story'], 'total_keywords')
            print("\n rock \n")
            rq.column_mean_std(data.loc[data['adventure_name'] == 'rock'], 'LOC')
            rq.column_mean_std(data.loc[data['adventure_name'] == 'rock'], 'unique_keywords')
            rq.column_mean_std(data.loc[data['adventure_name'] == 'rock'], 'total_keywords')
            print("\n turtle \n")
            rq.column_mean_std(data.loc[data['adventure_name'] == 'turtle'], 'LOC')
            rq.column_mean_std(data.loc[data['adventure_name'] == 'turtle'], 'unique_keywords')
            rq.column_mean_std(data.loc[data['adventure_name'] == 'turtle'], 'total_keywords')
        

## Between gender per level, per adventure
if False:
    print("\nTESTING")
    for i in range(5):
        level = i+1
        data = programs_LOC.loc[(programs_LOC['level'] == level)]

        print("\nLEVEL", level)

        print("Between gender - no adventures")
        rq.ttest_between_gender_all(data, 'LOC')
        rq.ttest_between_gender_all(data, 'unique_keywords')
        rq.ttest_between_gender_all(data, 'total_keywords')

        print("Between gender")
        rq.ttest_between_gender(data, 'story', 'LOC')
        rq.ttest_between_gender(data, 'story', 'unique_keywords')
        rq.ttest_between_gender(data, 'story', 'total_keywords')
        print('\n')
        rq.ttest_between_gender(data, 'rock', 'LOC')
        rq.ttest_between_gender(data, 'rock', 'unique_keywords')
        rq.ttest_between_gender(data, 'rock', 'total_keywords')
        print('\n')
        rq.ttest_between_gender(data, 'turtle', 'LOC')
        rq.ttest_between_gender(data, 'turtle', 'unique_keywords')
        rq.ttest_between_gender(data, 'turtle', 'total_keywords')


#manually tested programs
if False:
    #manual testing of 20 programs
    # 6951, 4664, 11796, 666, 6739, 4497, 4239, 13520, 8859, 3187
    print(programs_LOC.iloc[6951])   
    print(programs_LOC['code'][6951]) 
    print('\n') 
    print(programs_LOC.iloc[4664])   
    print(programs_LOC['code'][4664]) 
    print('\n')
    print(programs_LOC.iloc[11796])   
    print(programs_LOC['code'][11796]) 
    print('\n') 
    print(programs_LOC.iloc[666])   
    print(programs_LOC['code'][666]) 
    print('\n') 
    print(programs_LOC.iloc[6739])   
    print(programs_LOC['code'][6739]) 
    print('\n')
    print(programs_LOC.iloc[4497])   
    print(programs_LOC['code'][4497]) 
    print('\n') 
    print(programs_LOC.iloc[4239])   
    print(programs_LOC['code'][4239]) 
    print('\n')
    print(programs_LOC.iloc[13520])   
    print(programs_LOC['code'][13520]) 
    print('\n') 
    print(programs_LOC.iloc[8859])   
    print(programs_LOC['code'][8859]) 
    print('\n') 
    print(programs_LOC.iloc[3187])   
    print(programs_LOC['code'][3187]) 
    print('\n') 
    # 5148, 2475, 9624, 6768, 7099
    print(programs_LOC.iloc[5148])   
    print(programs_LOC['code'][5148]) 
    print('\n') 
    print(programs_LOC.iloc[2475])   
    print(programs_LOC['code'][2475]) 
    print('\n')
    print(programs_LOC.iloc[9624])   
    print(programs_LOC['code'][9624]) 
    print('\n') 
    print(programs_LOC.iloc[6768])   
    print(programs_LOC['code'][6768]) 
    print('\n') 
    print(programs_LOC.iloc[7099])   
    print(programs_LOC['code'][7099]) 
    #6184, 2663, 8394, 2717, 11254
    print(programs_LOC.iloc[6184])   
    print(programs_LOC['code'][6184]) 
    print('\n') 
    print(programs_LOC.iloc[2663])   
    print(programs_LOC['code'][2663]) 
    print('\n')
    print(programs_LOC.iloc[8394])   
    print(programs_LOC['code'][8394]) 
    print('\n') 
    print(programs_LOC.iloc[2717])   
    print(programs_LOC['code'][2717]) 
    print('\n') 
    print(programs_LOC.iloc[11254])   
    print(programs_LOC['code'][11254]) 
