(1) GWAS with BGData

(1.1) Loading the data

 rm(list=ls())
 library(BGData)
 setwd('../output')
 load('../data/example_sire_wg.RData')

(1.2) Creating a BGData object*

 DATA=BGData(geno=X, pheno=data.frame(y=blup,pc1=PC[,1],pc2=PC[,2]),map=data.frame())

(1.3) Running GWAS using BGData

 B<-GWAS(y~pc1+pc2,data=DATA,method='lm',mc.cores=10,verbose=T) # 6.9 seq with 10 cores
 plot(-log10(B[,4]),type='o',col=4,cex=.5,main='GWAS WG Sire model')

(2) BGLR

(2.1) Gaussian Prior (BRR)

 nIter=55000
 burnIn=5000
 X=scale(X)/sqrt(ncol(X))
 fmBRR=BGLR(y=blup,ETA=list(list(X=X,model='BRR')),nIter=nIter,burnIn=burnIn,saveAt='BRR_') # ~.05 sec/iteration
 save(fmBRR,file='fmBRR.RData')

(2.2) Scaled-t prior (BayesA)

 fmBA=BGLR(y=blup,ETA=list(list(X=X,model='BayesA')),nIter=nIter,burnIn=burnIn,saveAt='BA_') 
 save(fmBA,file='fmBA.RData')

(2.3) Spike-Slab-1 (BayesB)

 fmBB=BGLR(y=blup,ETA=list(list(X=X,model='BayesB')),nIter=nIter,burnIn=burnIn,saveAt='BB_') 
 save(fmBB,file='fmBB.RData')

(2.4) Spike-Slab-2 (BayesC)

 fmBC=BGLR(y=blup,ETA=list(list(X=X,model='BayesC')),nIter=55000,burnIn=5000)
 save(fmBC,file='fmBC.RData')

(2.5) Bayesian-LASSO (BL)

 fmBL=BGLR(y=blup,ETA=list(list(X=X,model='BL')),nIter=55000,burnIn=5000)
  save(fmBL,file='fmBL.RData')
pValue=B[,4]
thresholds=quantile(pValue,c(.005,.01,.05,.1,.33,.5))
sets=ifelse(pValue<=thresholds[1],1,
      ifelse(pValue<=thresholds[2],2,
       ifelse(pValue<=thresholds[3],3,
        ifelse(pValue<=thresholds[4],4,5))))
fmBRRSets=BGLR(y=blup,ETA=list(list(X=X,model='BRR_sets',sets=sets)),nIter=55000,burnIn=5000)
save(fmBRRSets,file='fmBRRSets.RData')

(3) 5-fold Cross-validation

N=nrow(X)
folds=rep(1:5,each=ceiling(N/5))[1:N]
folds=folds[order(runif(N))]

yHatCV_BRR=rep(NA,N)
yHatCV_BB=rep(NA,N)
yHatCV_BRRSets=rep(NA,N)
for(i in 2:5){
 yNA=blup
 tst=which(folds==i)
 yNA[tst]=NA
 fm=BGLR(y=yNA,ETA=list(list(X=X,model='BRR')),nIter=6000,burnIn=1000,verbose=F)
  yHatCV_BRR[tst]=fm$yHat[tst]
 fm=BGLR(y=yNA,ETA=list(list(X=X,model='BayesB')),nIter=6000,burnIn=1000,verbose=F)
  yHatCV_BB[tst]=fm$yHat[tst]
  
 DATA@pheno$y=yNA
 B<-GWAS(y~pc1+pc2,data=DATA,method='lm',mc.cores=10,verbose=T) # 6.9 seq with 10 cores
 pValue=B[,4]
 thresholds=quantile(pValue,c(.005,.01,.05,.1,.33,.5))
  sets=ifelse(pValue<=thresholds[1],1,
        ifelse(pValue<=thresholds[2],2,
         ifelse(pValue<=thresholds[3],3,
          ifelse(pValue<=thresholds[4],4,5)))) 
 fm=BGLR(y=yNA,ETA=list(list(X=X,model='BRR_sets',sets=sets)),nIter=6000,burnIn=1000,verbose=F)
 yHatCV_BRRSets[tst]=fm$yHat[tst]
  print(i)
  save(blup,yHatCV_BB,yHatCV_BRR,yHatCV_BRRSets,file='CV.RData')
}


pdf('5-fold CV.pdf')
 plot(y=blup,x=yHatCV_BB,cex=.5,col=4,ylab='BLUP',xlab='yHatCV_BB',main='BayesB')
 plot(y=blup,x=yHatCV_BRR,cex=.5,col=4,ylab='BLUP',xlab='yHatCV_BRR',main='BRR')
dev.off()