leelabsg/SKAT

skat_NULL_emmaX

Closed this issue · 1 comments

Dear Dr Lee,

I am observing strange behoavior with SKAT_NULL_emmaX. Dropbox links below are QQ plots for SKAT_Null_Model veruss SKAT_NULL_emmaX using the same data (R code below). For SKAT_Null_Model I use age, gender, bmi, and 10 PCs as covariates, QQ plot looks great. For SKAT_NULL_emmaX I use the kinship matrix (generated using /emmax-beta-07Mar2010/emmax-kin -v -d 10 transposed.data) and age/gender/bmi. The second QQ plot looks awful.
Any suggestions as to what I could be doing wrong?

Thanks,
Juan

https://www.dropbox.com/s/0ue9eg87bn0ejci/test.skat.emmax.txt.QQ.png?dl=0
https://www.dropbox.com/s/0xt9tfg1v47ulcc/test.skat.null.txt.QQ.png?dl=0

######################
library(SKAT);
q.bed<-"./data.bed";
q.bim<-"./data.bim";
q.fam<-"./data.fam";
q.setid<-"./fout.qe867.low.pcs-nomod-noseqf.setid";
q.ssd<-"./data.ssd";
q.info<-"./data.info";
q.cov<-"./data.txt";
kf<-"./data.BN.kinf";
fam_cov<-Read_Plink_FAM_Cov(q.fam,q.cov,Is.binary=FALSE,cov_header=TRUE);
y<-fam_cov$Phenotype;
Generate_SSD_SetID(q.bed,q.bim,q.fam,q.setid,q.ssd,q.info);
ssd.info<-Open_SSD(q.ssd,q.info);
obj<-SKAT_Null_Model(y ~ fam_cov$age + fam_cov$bmi + fam_cov$gender + fam_cov$pc1 + fam_cov$pc2 + fam_cov$pc3 + fam_cov$pc4 +fam_cov$pc5 + fam_cov$pc6 + fam_cov$pc7 + fam_cov$pc8 + fam_cov$pc9 + fam_cov$pc10);
out_cov<-SKAT.SSD.All(ssd.info,obj);
t<-out_cov$results;
t<-t[with(t,order(P.value)),];
d<-dim(t)[1];
t$rank<-seq(1,d);
t$bonf<-0.05/d;
t$bonf.sig<-t$P.value<t$bonf;
fn<-"test.skat.null.txt";
write.table(t,file=fn,sep="\t",quote=F,row.names=F,col.names=T);
fastqq2 <- function(pvals, ...) { np <- length(pvals); thin.idx <- 1:np; thin.logp.exp <- -log10(thin.idx/(np+1)); thin.logp.obs <- -log10(pvals[order(pvals)[thin.idx]]); plot(thin.logp.exp, thin.logp.obs, xlab=expression(-log10), ylab=expression(-log10), ...); abline(0, 1, col='gray', lty=2); thin.idx <- c((0.9)^(5:1), thin.idx); logp.cint.95 <- -log10(qbeta(0.95, thin.idx, np - thin.idx + 1)); logp.cint.05 <- -log10(qbeta(0.05, thin.idx, np - thin.idx + 1)); thin.logp.exp <- -log10(thin.idx/(np+1)); lines(thin.logp.exp, logp.cint.95, lty=2, col='red'); lines(thin.logp.exp, logp.cint.05, lty=2, col='red'); }
l <- 1; fastqq2(pchisq(qchisq(t$P.value,1)/l,1)); mt<-0.05/length(t$P.value); abline(h=-log10(mt), lty=3); png(paste(fn,'QQ','png',sep='.')); fastqq2(pchisq(qchisq(t$P.value,1)/l,1)); dev.off();
obj<-SKAT_NULL_emmaX(y ~ fam_cov$age + fam_cov$bmi + fam_cov$gender,Kin.File=kf);
out_cov<-SKAT.SSD.All(ssd.info,obj);
t<-out_cov$results;
t<-t[with(t,order(P.value)),];
d<-dim(t)[1];
t$rank<-seq(1,d);
t$bonf<-0.05/d;
t$bonf.sig<-t$P.value<t$bonf;
fn<-"test.skat.emmax.txt";
write.table(t,file=fn,sep="\t",quote=F,row.names=F,col.names=T);
fastqq2 <- function(pvals, ...) { np <- length(pvals); thin.idx <- 1:np; thin.logp.exp <- -log10(thin.idx/(np+1)); thin.logp.obs <- -log10(pvals[order(pvals)[thin.idx]]); plot(thin.logp.exp, thin.logp.obs, xlab=expression(-log10), ylab=expression(-log10), ...); abline(0, 1, col='gray', lty=2); thin.idx <- c((0.9)^(5:1), thin.idx); logp.cint.95 <- -log10(qbeta(0.95, thin.idx, np - thin.idx + 1)); logp.cint.05 <- -log10(qbeta(0.05, thin.idx, np - thin.idx + 1)); thin.logp.exp <- -log10(thin.idx/(np+1)); lines(thin.logp.exp, logp.cint.95, lty=2, col='red'); lines(thin.logp.exp, logp.cint.05, lty=2, col='red'); }
l <- 1; fastqq2(pchisq(qchisq(t$P.value,1)/l,1)); mt<-0.05/length(t$P.value); abline(h=-log10(mt), lty=3); png(paste(fn,'QQ','png',sep='.')); fastqq2(pchisq(qchisq(t$P.value,1)/l,1)); dev.off();

This issue is fixed in newer versions