# This code was written by Thea House in October 2021 to run on R version 4.0.5. # The purpose of the code is to create a synthetic data set from the original # data that has general and specific utility. # Set working directory and import original data setwd("~/LEXAR/Bristol/R working directory/Dot Probe Attention Training") mydata <- read.csv(file="DotProbeAttentionTraining_DataSheet_V1.2_20210827.csv", header = TRUE) install.packages("synthpop") library(synthpop) # Subset each condition and experiment Lab_HighFat <- subset(mydata, Experiment == "Lab" & Condition == "High_Fat", select = Change_AB : Change_BD) Lab_LowFat <- subset(mydata, Experiment == "Lab" & Condition == "Low_Fat", select = Change_AB : Change_BD) Online500_HighFat <- subset(mydata, Experiment == "Online" & Condition == "High_Fat" & SOA == 500, select = Change_AB : Change_BD) Online500_LowFat <- subset(mydata, Experiment == "Online" & Condition == "Low_Fat" & SOA == 500, select = Change_AB : Change_BD) Online100_HighFat <- subset(mydata, Experiment == "Online" & Condition == "High_Fat" & SOA == 100, select = Change_AB : Change_BD) Online100_LowFat <- subset(mydata, Experiment == "Online" & Condition == "Low_Fat" & SOA == 100, select = Change_AB : Change_BD) # Create synthetic data for each subset with pdf plots for visual comparison # of the original and synthetic data my.seed.Lab <- 6102021 Syn_Lab_HighFat <- syn(Lab_HighFat, seed = my.seed.Lab) write.syn(Syn_Lab_HighFat,file = "Syn_Lab_HighFat", filetype = "csv") Syndf_Lab_HighFat <- read.csv(file="Syn_Lab_HighFat.csv", header = TRUE) Syndf_Lab_HighFat$Experiment <- "Lab" Syndf_Lab_HighFat$SOA <- "500" Syndf_Lab_HighFat$Condition <- "High_Fat" pdf("Syn_Lab_HighFat.pdf") compare(Syn_Lab_HighFat, Lab_HighFat, breaks = 10, stat = "counts")$plot # Present the raw counts for each variable # Visual comparison of original and synthetic datasets dev.off() Syn_Lab_LowFat <- syn(Lab_LowFat, seed = my.seed.Lab) write.syn(Syn_Lab_LowFat,file = "Syn_Lab_LowFat", filetype = "csv") Syndf_Lab_LowFat <- read.csv(file="Syn_Lab_LowFat.csv", header = TRUE) Syndf_Lab_LowFat$Experiment <- "Lab" Syndf_Lab_LowFat$SOA <- "500" Syndf_Lab_LowFat$Condition <- "Low_Fat" pdf("Syn_Lab_LowFat.pdf") compare(Syn_Lab_LowFat, Lab_LowFat, breaks = 10, stat = "counts")$plot # Present the raw counts for each variable # Visual comparison of original and synthetic datasets dev.off() my.seed.Online500 <- 7102021 Syn_Online500_HighFat <- syn(Online500_HighFat, seed = my.seed.Online500) write.syn(Syn_Online500_HighFat,file = "Syn_Online500_HighFat", filetype = "csv") Syndf_Online500_HighFat <- read.csv(file="Syn_Online500_HighFat.csv", header = TRUE) Syndf_Online500_HighFat$Experiment <- "Online" Syndf_Online500_HighFat$SOA <- "500" Syndf_Online500_HighFat$Condition <- "High_Fat" pdf("Syn_Online500_HighFat.pdf") compare(Syn_Online500_HighFat, Online500_HighFat, breaks = 10, stat = "counts")$plot # Present the raw counts for each variable # Visual comparison of original and synthetic datasets dev.off() Syn_Online500_LowFat <- syn(Online500_LowFat, seed = my.seed.Online500) write.syn(Syn_Online500_LowFat,file = "Syn_Online500_LowFat", filetype = "csv") Syndf_Online500_LowFat <- read.csv(file="Syn_Online500_LowFat.csv", header = TRUE) Syndf_Online500_LowFat$Experiment <- "Online" Syndf_Online500_LowFat$SOA <- "500" Syndf_Online500_LowFat$Condition <- "Low_Fat" pdf("Syn_Online500_LowFat.pdf") compare(Syn_Online500_LowFat, Online500_LowFat, breaks = 10, stat = "counts")$plot # Present the raw counts for each variable # Visual comparison of original and synthetic datasets dev.off() my.seed.Online100 <- 7102021 Syn_Online100_HighFat <- syn(Online100_HighFat, seed = my.seed.Online100) write.syn(Syn_Online100_HighFat,file = "Syn_Online100_HighFat", filetype = "csv") Syndf_Online100_HighFat <- read.csv(file="Syn_Online100_HighFat.csv", header = TRUE) Syndf_Online100_HighFat$Experiment <- "Online" Syndf_Online100_HighFat$SOA <- "100" Syndf_Online100_HighFat$Condition <- "High_Fat" pdf("Syn_Online100_HighFat.pdf") compare(Syn_Online100_HighFat, Online100_HighFat, breaks = 10, stat = "counts")$plot # Present the raw counts for each variable # Visual comparison of original and synthetic datasets dev.off() Syn_Online100_LowFat <- syn(Online100_LowFat, seed = my.seed.Online100) write.syn(Syn_Online100_LowFat,file = "Syn_Online100_LowFat", filetype = "csv") Syndf_Online100_LowFat <- read.csv(file="Syn_Online100_LowFat.csv", header = TRUE) Syndf_Online100_LowFat$Experiment <- "Online" Syndf_Online100_LowFat$SOA <- "100" Syndf_Online100_LowFat$Condition <- "Low_Fat" pdf("Syn_Online100_LowFat.pdf") compare(Syn_Online100_LowFat, Online100_LowFat, breaks = 10, stat = "counts")$plot # Present the raw counts for each variable # Visual comparison of original and synthetic datasets dev.off() # Combine synthetic subsets to create a full synthetic data set syn_combined <- rbind(Syndf_Lab_HighFat, Syndf_Lab_LowFat, Syndf_Online500_HighFat, Syndf_Online500_LowFat, Syndf_Online100_HighFat, Syndf_Online100_LowFat) # Reorder variables to match order in original dataset syn_combined <- syn_combined[, c(4,5,6,1,2,3)] # Write dataframe to csv write.csv(syn_combined, "DotProbeAttentionTraining_SyntheticDataSheet_V1.0_20211006.csv")