######################################
##Script to run MR analyses for EF to CUD
#The syntax was created by Sabrina M. I. Burton.
#The syntax was checked and edited by Zoe E. Reed
#R Version 4.0.3
#The data for analyses was obtained from published genome-wide association studies
#The code below shows all manipulations and recodes with annotations

######################################
#Load libraries (use install.packages() to install)
######################################
library(devtools)
library(data.table)
#install_github("MRCIEU/TwoSampleMR")
#devtools::install_github('MarkEdmondson1234/googleAuthR@v0.8.1')
library(R.utils)
library(TwoSampleMR)
library(MRInstruments)
library(MendelianRandomization)
library(ggplot2)

######################################
#Read in GWAS data
######################################

#Create the ID for the exposure data. Exposure data output from proxy search script will be used
exp_id<-"executive_function" 

#Columns for the outcome data, available from the GWAS datasets.
#Those commented out are where this data is not available
out_file<-"CUD_EUR_casecontrol_public_11.14.2020" #Add path to file here
out_sep<-" " #This is the file separator
out_snp<-"SNP" #Name of column with the rsid in it
out_beta<-"Beta" #Name of column with effect size (beta or OR)
out_se<-"SE" #Name of column with standard error
out_ea<-"A1" #Name of column with the effect allele
out_oa<-"A2" #Name of column with other allele
#out_eaf<-"Freq1" #Name of column with effect allele frequency
out_p<-"P" #Name of column with the p value
out_n<-"N" #Name of column with total sample size
out_id<-"CUD" #Phenotype name
out_ncase<-"N_CAS" #These (ncase and ncontrol) two are optional if you have cases and controls sample size instead or in addition to total if available, so enter column name for that in quotes and uncomment if using
out_ncontrol<-"N_CON"


#Read in exposure data from proxy search script
exp_dat_clumped <- read.csv("EF_CUD_Proxy.csv")


#Specify outcome data, comment out aspects that are not included in the columns for the outcome data.
out_dat<-read_outcome_data(
  snps = exp_dat_clumped$SNP,
  filename = out_file,
  sep = out_sep,
  snp_col = out_snp,
  beta_col = out_beta, 
  se_col = out_se,
  effect_allele_col = out_ea,
  other_allele_col = out_oa,
  #eaf_col = out_eaf,
  pval_col = out_p,
  samplesize_col = out_n,
  ncase_col = out_ncase,
  ncontrol_col = out_ncontrol
)


#Plot names, here specify the path to save out output files to.
path<-"FILEPATH"
exp_out<-"ExecutiveFunctioning_CUD"


#Check data, the number of rows should be the same as the clumped exposure data
dim(out_dat)
dim(exp_dat_clumped)
head(out_dat)
head(exp_dat_clumped)

######################################
#Harmonise data
######################################

#Harmonise data
har_dat <- harmonise_data(
  exposure_dat = exp_dat_clumped, 
  outcome_dat = out_dat
)

#Check SNPs to see if harmonisation worked ok
dim(har_dat)
head(har_dat)

#Specify exposure and outcome phenotypes
har_dat$id.exposure<-exp_id
har_dat$id.outcome<-out_id

######################################
#Run analyses and use sink to save to a text file
#We use the generate_odds_ratio when the outcome is binary e.g. case/control
######################################
sink(paste0(path, exp_out,"MR_Results.txt"))

#Main analysis
results<-mr(har_dat)
or<-generate_odds_ratios(results)
cat("
MR results
")
print(or)

#Sensitivity analyses
het<-mr_heterogeneity(har_dat) #Heterogeneity
cat("
Heterogeneity
")
print(het)

plt<-mr_pleiotropy_test(har_dat) #Horizontal pleiotropy
cat("
Horizontal pleiotropy
")
print(plt)

#Plots
scatter<-mr_scatter_plot(or, har_dat) #SNP effects on exp against SNP effects on outcome
ggsave(scatter[[1]], file=paste0(path, exp_out,"scatter.pdf"), width=7, height=7)

res_single<-mr_singlesnp(har_dat)
res_single_or<-generate_odds_ratios(res_single)
cat("
MR on single SNPs
")
print(res_single_or)
forest<-mr_forest_plot(res_single_or) #Forest plot to compare MR estimates from different methods against single SNP tests
ggsave(forest[[1]], file=paste0(path, exp_out,"forest.pdf"), width=7, height=15)

res_loo<-mr_leaveoneout(har_dat)
res_loo_or<-generate_odds_ratios(res_loo)

cat("
Leave one out sensitivity analysis
")
print(res_loo_or)
loo<-mr_leaveoneout_plot(res_loo_or) #Leave-one-out analysis
ggsave(loo[[1]], file=paste0(path, exp_out,"leaveoneout.pdf"), width=7, height=15)

funnel<-mr_funnel_plot(res_single_or) #Funnel plot
ggsave(funnel[[1]], file=paste0(path, exp_out,"funnel.pdf"), width=7, height=7)


#I-squared function
Isq <- function(y,s){
  k          = length(y)
  w          = 1/s^2; sum.w  = sum(w)
  mu.hat     = sum(y*w)/sum.w  
  Q          = sum(w*(y-mu.hat)^2)
  Isq        = (Q - (k-1))/Q
  Isq        = max(0,Isq)
  return(Isq)
}

#Rename required columns
har_dat$BetaXG<-har_dat$beta.exposure
har_dat$seBetaXG<-har_dat$se.exposure
BetaXG = har_dat$BetaXG
seBetaXG = har_dat$seBetaXG 
seBetaYG<-har_dat$se.outcome

BXG= abs(BetaXG) #Make gene--exposure estimates positive  

#Calculate F statistics and I-squared statistics
#to measure Instrument strength for MR-Egger

F   = BXG^2/seBetaXG^2
mF  = mean(F)
Isq_unweighted <- Isq(BXG,seBetaXG) #Unweighted
Isq_weighted <- Isq((BXG/seBetaYG),(seBetaXG/seBetaYG)) #Weighted

cat("
Mean F, unweighted I-squared and weighted I-squared
")
print(mF)
print(Isq_unweighted)
print(Isq_weighted)


sink()