#==========================================================#
#====== VISUAL WORLD DATA PROCESSING IN R USING VWPre =====#
#======   Guide to accompany tutuorial for the BPRG   =====#
#======       James Bartolotti and Scott Schroeder    =====#
#======        j-bartolotti@u.northwestern.edu        =====#
#======         schroeder@u.northwestern.edu          =====#
#======                  06/01/2016                   =====#
#==========================================================#
#
#For additional information, refer to the VWPpre documentation:
#Package description https://cran.r-project.org/web/packages/VWPre/index.html
#Manual https://cran.r-project.org/web/packages/VWPre/VWPre.pdf
#Demo https://cran.r-project.org/web/packages/VWPre/vignettes/SR_Example.html
#
#For further detail on using growth curves to analyze eyetracking data, refer to tutorials and materials provided by Dan Mirman http://www.danmirman.org/gca

#NOTE: In this tutorial, commands that you need to change for your own data processing are marked by a comment with three asterisks, ***

#==================================#
#====== STEP 0: Install VWPre =====#
#==================================#

#In this step, we will install the required packages if necessary. This step only needs to be done once.

#Check the version of R you have installed. VWPre needs version 3.2.4 or above
version
#In the output fields, look for 'major' and 'minor.' I am running R 3.3.0
#major          3                           
#minor          3.0

library(VWPre) #If this library call fails, it means that you need to install VWPre, using the following command:
install.packages("VWPre")

#===============================================#
#====== STEP 1: Importing Eyetracking Data =====#
#===============================================#

#In this step we import data from a fixation report generated by DataViewer and manipulate it for processing in VWPre.

#load the visual world data preprocessing tools for use in this session
library(VWPre)

#In DataViewer, export your edf file (including all columns) as a tab-delimited text file. IMPORTANT: You want to make a single text file that contains all subjects at once. Then in R, import that text file.
#VWdat = read.table("1000HzData.txt", header = T, sep = "\t", na.strings = c(".", "NA")) #***edit this command to load your file instead of 1000HzData.txt. If your file is comma delimited, change sep to: sep = "," 
data(VWdat) #The VWPre library already contains an imported VWdat demo file which we load here. ***Remove this line if you are loading your own text file.

#convert the imported text file to a data frame we can manipulate in R.
dat0 = prep_data(data = VWdat, Subject = "RECORDING_SESSION_LABEL", Item = "itemid") #*** Edit Item = "itemid" so that itemid refers to the correct column in your dataset
 
 #Remove extra columns that dataviewer exports but that you don't need
 dat0 = select(dat0, -starts_with("AVERAGE"), -starts_with("DATA_"), 
               -starts_with("HTARGET"), -starts_with("IP"), 
               -starts_with("LEFT_ACCELLERATION"), -starts_with("LEFT_GAZE"), 
               -starts_with("LEFT_IN_"), -starts_with("LEFT_PUPIL"), 
               -starts_with("LEFT_VELOCITY"), -starts_with("RESOLUTION"), 
               -starts_with("RIGHT_ACCELLERATION"), -starts_with("RIGHT_GAZE"), 
               -starts_with("RIGHT_IN_"), -starts_with("RIGHT_PUPIL"), 
               -starts_with("RIGHT_VELOCITY"), -starts_with("SAMPLE"), 
               -starts_with("TARGET_"), -starts_with("TRIAL_START"), 
               -starts_with("VIDEO"))
if (length(dat0)==0) {stop('error: dat0 is empty. You may have removed too many columns')}

			   
#Eyelink returns NA for visual fixations that do not occur in any of your specified interest areas. Here we relabel those fixations as occuring in interest area 0, with a label of "Outside"
dat1 = relabel_na(data = dat0, NoIA = 4) #***NoIA refers to the number of interest areas in your data. For typical visual world tasks, there are 4 interest areas. If you have a different number, edit that number here.


#This creates a new Time variable in which 0 refers to stimulus onset, and negative numbers refer to pre-stimulus onset. 
dat2 = create_time_series(data = dat1, Offset = 100) #*** Offset is the length in milliseconds of your pre-stimulus onset window. Change 100 to the offset window in your data.

#Select the eye that fixations were recorded from (Experiments in the BPRG lab typically use the Right eye). Check which eye was used with the following command:
check_eye_recording(data = dat2)
#
dat3 = select_recorded_eye(data = dat2, Recording = "R", WhenLandR = "Right") #*** Change Recording = "R" if necessary. In order to collapse data from both eyes, use Recording = "LandR". 

#========================================================#
#====== STEP 2: Proportion and Empirical Logit Data =====#
#========================================================#

#In this step we prepare the dataset for proportion of looks analyses. This step requires data frame 'dat3' produced in Step 1.

#Determine what frequency the eyetracker sampled at so that we can collapse the data into specified time bins. This command will usually print a single value, e.g. 1000 Hz. If it determines that different sampling rates were used at different times in the dataset (rare), you will need to subset the data by sampling rate and bin each subset separately.
check_samplingrate(dat3) #the demo data was collected at 1000 Hz.
ds_options(SamplingRate = 1000) #*** Replace 1000 with the Sampling Rate of your data. This command will tell you what bin sizes it can generate using your dataset.

#Generate specified time bins for your data.
dat4 = bin_prop(dat3, NoIA = 4, BinSize = 20, SamplingRate = 1000) #*** edit NoIA = 4 to the number of interest areas in your dataset, edit BinSize = 20 to the time bins that you want (in milliseconds) and change the sampling rate as provided in check_samplingrate(). 

#To transform our data to Empirical Logits, we need to know how many samples from the eyetracker were included in each of our timebins. This is calculated as Originial_Sampling_Rate / Binned_Sampling_Rate. For the Demo, this is 1000/50
check_samples_per_bin(dat4) #The demo contains 20 samples in each bin

#Generate empirical logits from the binned proportion data
dat5 = transform_to_elogit(dat4, NoIA = 4, SamplesPerBin = 20) #*** Edit NoIA = 4 to the number of interest areas in your dataset, and edit SamplesPerBin according to the calculation provided by check_samples_per_bin()

#To improve readability, rename the auto-generated column names to correspond to your experimental design. 
dat6 = rename_columns(dat5, Labels = c(IA1="Target", IA2="Rhyme", IA3="OnsetComp", IA4="Distractor")) #*** Edit each of the fields to the Interest Areas in your design.


#==================================================#
#====== STEP 3: Plotting Fixation Timecourses =====#
#==================================================#
library(ggplot2) #load this library for plotting
#Refer to the Cookbook for R for ways to add features to your plots using ggplot2 http://www.cookbook-r.com/Graphs/

#VWPre uses a custom theme for its plots. the function plot_avg takes a dataset as well as several optional arguments:
#type: "proportion" or "elogit".
#xlim: A vector of two integers specifying the limits of the x-axis (time).
#IAColumns: A named character vector specifying the desired interest area columns to include, as well as custom strings for the legend.
#Condition1: A string containing the column name corresponding to the first condition, if any (e.g., Group for monolingual/bilingual).
#Condition2: A string containing the column name corresponding to the second condition, if any (e.g., Target present/absent).
#Cond1Labels: A named character vector specifying the desired custom labels of the levels of the first condition.
#Cond2Labels: A named character vector specifying the desired custom labels of the levels of the second condition.
#ErrorBar: true or false, indicating whether standard error bars should be included.
#VWPreTheme: true or false, indicating whether VWPre's built-in theme should be used (true), or ggplot2's base theme (false)

plot_handle = plot_avg(data = dat6, type = "proportion", xlim = c(-100, 1000), 
    IAColumns = c(IA_Target_P = "Target", IA_Rhyme_P = "Rhyme", IA_OnsetComp_P = "OnsetComp", IA_Distractor_P = "Distractor"),
    Condition1 = NA, Condition2 = NA, Cond1Labels = NA, Cond2Labels = NA,
    ErrorBar = TRUE, VWPreTheme = TRUE) 



# Modifications to the default theme can be specified by adding them to the plot_avg function. For example:
plot_handle + theme(axis.text = element_text(size = 15), legend.position = c(.1,.75))

#Empirical Logits can be visualized using the same syntax as plotting proportion data, just changing the TYPE parameter.
plot_avg(data = dat6, type = "elogit", xlim = c(-100, 1000), 
    IAColumns = c(IA_Target_P = "Target", IA_Rhyme_P = "Rhyme", IA_OnsetComp_P = "OnsetComp", IA_Distractor_P = "Distractor"),
    Condition1 = NA, Condition2 = NA, Cond1Labels = NA, Cond2Labels = NA,
    ErrorBar = TRUE, VWPreTheme = TRUE) 


#Specifying Condition1 or Condition2 will stack subplots left or right (or both)
plot_avg(data = dat6, type = "proportion", xlim = c(0, 1000), 
    IAColumns = c(IA_Target_P = "Target", IA_Rhyme_P = "Rhyme", IA_OnsetComp_P = "OnsetComp", IA_Distractor_P = "Distractor"),
	Condition1 = "talker", Condition2 = NA, 
	Cond1Labels = c(CH1 = "Chinese 1", CH10 = "Chinese 3", CH9 = "Chinese 2", EN3 = "English 1"), Cond2Labels = NA, 
	ErrorBar = TRUE, VWPreTheme = TRUE)

plot_avg(data = dat6, type = "proportion", xlim = c(0, 1000), 
    IAColumns = c(IA_Target_P = "Target", IA_Rhyme_P = "Rhyme", IA_OnsetComp_P = "OnsetComp", IA_Distractor_P = "Distractor"),
	Condition1 = NA, Condition2 = "talker", 
	Cond1Labels = NA, Cond2Labels = c(CH1 = "Chinese 1", CH10 = "Chinese 3", CH9 = "Chinese 2", EN3 = "English 1"), 
    ErrorBar = TRUE, VWPreTheme = TRUE)

plot_avg(data = dat6, type = "proportion", xlim = c(0, 1000), 
    IAColumns = c(IA_Target_P = "Target", IA_Rhyme_P = "Rhyme", IA_OnsetComp_P = "OnsetComp", IA_Distractor_P = "Distractor"),
	Condition1 = "talker", Condition2 = "Exp", 
	Cond1Labels = c(CH1 = "Chinese 1", CH10 = "Chinese 3", CH9 = "Chinese 2", EN3 = "English 1"), Cond2Labels = c(High = "High Exp", Low = "Low Exp"), 
	ErrorBar = TRUE, VWPreTheme = TRUE)


# plot_avg_diff() can be used with similar syntax to plot_avg() in order to visualize the difference in fixations between interest areas over time.
plot_avg_diff(data = dat6, xlim = c(0, 1000), DiffCols = c(IA_Target_P = "Target", IA_Rhyme_P = "Rhyme"), 
            Condition1 = NA, Condition2 = NA, Cond1Labels = NA,
            Cond2Labels = NA, ErrorBar = TRUE, VWPreTheme = TRUE)

					

					
#============================================================#
#====== STEP 4: Saving Dataset for Statistical Analysis =====#
#============================================================#

#Before using the dataset for statistical analysis, remove unneeded columns and rearrange the data by Subject, Trial, and Time using the following workflow.
FinalDat = ungroup(dat6)
FinalDat = select(FinalDat, Subject, Item, Time, starts_with("IA"), Event, TRIAL_INDEX, Rating, InteractChinese, Exp, target, rhymecomp, onsetcomp, distractor) #*** In addition to Subject, Item, Time, starts_with("IA"), Event, and TRIAL_INDEX, you will want to add additional columns specific to your dataset specifying the columns of your experimental conditions and other variables.
FinalDat = arrange(FinalDat, Subject, TRIAL_INDEX, Time)


		 

#=================================#
#======       APPENDIX       =====#
#=================================#

#The fasttrack() function can be used to consolidate the preprocessing workflow into a single command. Import your data from a text file, remove the unneeded columns, and run fasttrack by providing all parameters at once.
VWdat <- read.table("yourfile.txt", header = T, sep = "\t", na.strings = c(".", "NA"))
VWdat <- select(VWdat, -starts_with("AVERAGE"), -starts_with("DATA_"), 
               -starts_with("HTARGET"), -starts_with("IP"), 
               -starts_with("LEFT_ACCELLERATION"), -starts_with("LEFT_GAZE"), 
               -starts_with("LEFT_IN_"), -starts_with("LEFT_PUPIL"), 
               -starts_with("LEFT_VELOCITY"), -starts_with("RESOLUTION"), 
               -starts_with("RIGHT_ACCELLERATION"), -starts_with("RIGHT_GAZE"), 
               -starts_with("RIGHT_IN_"), -starts_with("RIGHT_PUPIL"), 
               -starts_with("RIGHT_VELOCITY"), -starts_with("SAMPLE"), 
               -starts_with("TARGET_"), -starts_with("TRIAL_START"), 
               -starts_with("VIDEO"))
dat5 <- fasttrack(data = VWdat, Subject = "RECORDING_SESSION_LABEL", Item = "itemid", 
    EventColumns = c("Subject", "TRIAL_INDEX"), NoIA = 4, Offset = 100, Recording = "LandR", 
  WhenLandR = "Right", BinSize = 20, SamplingRate = 1000,
  SamplesPerBin = 20, Constant = 0.5, Output = "ELogit")

