###################################### ##Script to run MR analyses for EF to CUD #The syntax was created by Sabrina M. I. Burton. #The syntax was checked and edited by Zoe E. Reed #R Version 4.0.3 #The data for analyses was obtained from published genome-wide association studies #The code below shows all manipulations and recodes with annotations ###################################### #Load libraries (use install.packages() to install) ###################################### library(devtools) library(data.table) #install_github("MRCIEU/TwoSampleMR") #devtools::install_github('MarkEdmondson1234/googleAuthR@v0.8.1') library(R.utils) library(TwoSampleMR) library(MRInstruments) library(MendelianRandomization) library(ggplot2) ###################################### #Read in GWAS data ###################################### #Create the ID for the exposure data. Exposure data output from proxy search script will be used exp_id<-"executive_function" #Columns for the outcome data, available from the GWAS datasets. #Those commented out are where this data is not available out_file<-"CUD_EUR_casecontrol_public_11.14.2020" #Add path to file here out_sep<-" " #This is the file separator out_snp<-"SNP" #Name of column with the rsid in it out_beta<-"Beta" #Name of column with effect size (beta or OR) out_se<-"SE" #Name of column with standard error out_ea<-"A1" #Name of column with the effect allele out_oa<-"A2" #Name of column with other allele #out_eaf<-"Freq1" #Name of column with effect allele frequency out_p<-"P" #Name of column with the p value out_n<-"N" #Name of column with total sample size out_id<-"CUD" #Phenotype name out_ncase<-"N_CAS" #These (ncase and ncontrol) two are optional if you have cases and controls sample size instead or in addition to total if available, so enter column name for that in quotes and uncomment if using out_ncontrol<-"N_CON" #Read in exposure data from proxy search script exp_dat_clumped <- read.csv("EF_CUD_Proxy.csv") #Specify outcome data, comment out aspects that are not included in the columns for the outcome data. out_dat<-read_outcome_data( snps = exp_dat_clumped$SNP, filename = out_file, sep = out_sep, snp_col = out_snp, beta_col = out_beta, se_col = out_se, effect_allele_col = out_ea, other_allele_col = out_oa, #eaf_col = out_eaf, pval_col = out_p, samplesize_col = out_n, ncase_col = out_ncase, ncontrol_col = out_ncontrol ) #Plot names, here specify the path to save out output files to. path<-"FILEPATH" exp_out<-"ExecutiveFunctioning_CUD" #Check data, the number of rows should be the same as the clumped exposure data dim(out_dat) dim(exp_dat_clumped) head(out_dat) head(exp_dat_clumped) ###################################### #Harmonise data ###################################### #Harmonise data har_dat <- harmonise_data( exposure_dat = exp_dat_clumped, outcome_dat = out_dat ) #Check SNPs to see if harmonisation worked ok dim(har_dat) head(har_dat) #Specify exposure and outcome phenotypes har_dat$id.exposure<-exp_id har_dat$id.outcome<-out_id ###################################### #Run analyses and use sink to save to a text file #We use the generate_odds_ratio when the outcome is binary e.g. case/control ###################################### sink(paste0(path, exp_out,"MR_Results.txt")) #Main analysis results<-mr(har_dat) or<-generate_odds_ratios(results) cat(" MR results ") print(or) #Sensitivity analyses het<-mr_heterogeneity(har_dat) #Heterogeneity cat(" Heterogeneity ") print(het) plt<-mr_pleiotropy_test(har_dat) #Horizontal pleiotropy cat(" Horizontal pleiotropy ") print(plt) #Plots scatter<-mr_scatter_plot(or, har_dat) #SNP effects on exp against SNP effects on outcome ggsave(scatter[[1]], file=paste0(path, exp_out,"scatter.pdf"), width=7, height=7) res_single<-mr_singlesnp(har_dat) res_single_or<-generate_odds_ratios(res_single) cat(" MR on single SNPs ") print(res_single_or) forest<-mr_forest_plot(res_single_or) #Forest plot to compare MR estimates from different methods against single SNP tests ggsave(forest[[1]], file=paste0(path, exp_out,"forest.pdf"), width=7, height=15) res_loo<-mr_leaveoneout(har_dat) res_loo_or<-generate_odds_ratios(res_loo) cat(" Leave one out sensitivity analysis ") print(res_loo_or) loo<-mr_leaveoneout_plot(res_loo_or) #Leave-one-out analysis ggsave(loo[[1]], file=paste0(path, exp_out,"leaveoneout.pdf"), width=7, height=15) funnel<-mr_funnel_plot(res_single_or) #Funnel plot ggsave(funnel[[1]], file=paste0(path, exp_out,"funnel.pdf"), width=7, height=7) #I-squared function Isq <- function(y,s){ k = length(y) w = 1/s^2; sum.w = sum(w) mu.hat = sum(y*w)/sum.w Q = sum(w*(y-mu.hat)^2) Isq = (Q - (k-1))/Q Isq = max(0,Isq) return(Isq) } #Rename required columns har_dat$BetaXG<-har_dat$beta.exposure har_dat$seBetaXG<-har_dat$se.exposure BetaXG = har_dat$BetaXG seBetaXG = har_dat$seBetaXG seBetaYG<-har_dat$se.outcome BXG= abs(BetaXG) #Make gene--exposure estimates positive #Calculate F statistics and I-squared statistics #to measure Instrument strength for MR-Egger F = BXG^2/seBetaXG^2 mF = mean(F) Isq_unweighted <- Isq(BXG,seBetaXG) #Unweighted Isq_weighted <- Isq((BXG/seBetaYG),(seBetaXG/seBetaYG)) #Weighted cat(" Mean F, unweighted I-squared and weighted I-squared ") print(mF) print(Isq_unweighted) print(Isq_weighted) sink()