pairwise comparison for one control vs one treated - diffloop/ HiChIP
chaigenomics opened this issue · 5 comments
Hi,
Caleb and Martin.
I have an issue with diffloop. I have 16 HiChIP samples, 1 control and 4 cancer cell lines, processed by HiC-Pro and hichipper to fetch loops. No I am interested in finding loss and gain in interaction, which is why I am using diffloop. Since the 15 cancer lines are extremely heterogeneous, I was planning to do pairwise analysis, so control to line A and control to line B etc. But when I do loopAssoc, for my diff loop object, I am thrown this error
group <- factor(c("control", "GBM")) design <- model.matrix(~group) km_res <- loopAssoc(sigloops_5kb, method = "edgeR", design = design, contrast = "contr.treatment")
The coefficients of the fitted GLM object are: (Intercept) groupsGBMError in glmFit.default(y, design = design, dispersion = dispersion, offset = offset, : NA dispersions not allowed In addition: Warning message: In estimateGLMCommonDisp.default(y = y$counts, design = design, : No residual df: setting dispersion to NA
Is it so, that I can't do pairwise comparisons, with just one replicate for diffloop, or is it something wrong with my design modes
Hi Caleb,
This is the code that did not work (1 control to 1 cancer)
`library(diffloop)
library(diffloopdata)
library(ggplot2)
library(GenomicRanges)
library(ggrepel)
library(DESeq2)
library(rtracklayer)
library(GenomicInteractions)
library(edgeR)
library(dplyr)
library(GenomicFeatures)
mybeddir1 <- file.path("/Volumes/ucmm/GrpSR/Itzel&Chaitali/HiChIP_H3K4me3/test")
samples1 <- c("hAstro", "U3121")
full1 <- loopsMake.mango(mybeddir1, samples1)
cells <- c("control", "GBM")
full1 <- updateLDGroups(full1, cells)
sig_loops_pairwise <- mangoCorrection(full1, FDR= 0.01)
dim(sig_loops_pairwise)
#anchors interactions samples colData rowData
#1 31761 65659 2 2 3
sig_loops_anchors <- as.data.frame(sig_loops_pairwise@anchors)
sig_loops_counts <- as.data.frame(sig_loops_pairwise@counts)
sig_loops_interactions <- as.data.frame(sig_loops_pairwise@interactions)
sig_loops_rowData <- as.data.frame(sig_loops_pairwise@rowData)
k_dis <- ((sig_loops_counts[,1]>=5 &sig_loops_counts[,2]==0) | (sig_loops_counts[,2]>=5 &sig_loops_counts[,1]==0))
qc_filt <- subsetLoops(sig_loops, !k_dis)
km_filt <- qc_filt[,c(1,2)]
dim(km_filt)
#anchors interactions samples colData rowData
#1 53175 107029 2 2 3
km_res <- quickAssoc(km_filt)
"Error in glmFit.default(y, design = design, dispersion = dispersion, offset = offset, :
NA dispersions not allowed
In addition: Warning message:
In estimateGLMCommonDisp.default(y = y$counts, design = design, :
No residual df: setting dispersion to NA"
`
This is the code that works:
`library(diffloop)
library(diffloopdata)
library(ggplot2)
library(GenomicRanges)
library(ggrepel)
library(DESeq2)
library(rtracklayer)
library(GenomicInteractions)
library(edgeR)
library(dplyr)
library(GenomicFeatures)
mybeddir <- file.path("/Volumes/GrpSR08/hicpro_loops/mangoloops")
samples1 <- c("hAstro", "U3008", "U3009")
full1 <- loopsMake.mango(mybeddir, samples1)
celltypes <- c("control", "GBM", "GBM")
full1 <- updateLDGroups(full1, celltypes)
head(full1)
dim(full1)
# anchors interactions samples colData rowData
#1 76780 270973 3 2 1
#quality control
#first rule is to remove CNV regions, to reduce bias in topology due to structural variation
#second choose loops with FDR <0.01
sig_loops <- mangoCorrection(full1, FDR= 0.01)
dim(sig_loops)
# anchors interactions samples colData rowData
#1 60141 136059 3 2 3
sig_loops_anchors <- as.data.frame(sig_loops@anchors)
sig_loops_counts <- as.data.frame(sig_loops@counts)
sig_loops_interactions <- as.data.frame(sig_loops@interactions)
sig_loops_rowData <- as.data.frame(sig_loops@rowData)
#another thing is in PET experiments, they remove loops present 5 or more in experiment A and 0 in B. This reduces the enrichment bias, or gained, loss bias. Although, it seems that
# an interaction like this is what we are looking into. Therefore I will do the qc with both and see, if PCA and loop distance differs too much
k_dis <- ((sig_loops_counts[,1]>=5 &sig_loops_counts[,2]==0) | (sig_loops_counts[,2]>=5 &sig_loops_counts[,1]==0))
m_dis <- ((sig_loops_counts[,2]>=5 &sig_loops_counts[,3]==0) | (sig_loops_counts[,3]>=5 &sig_loops_counts[,2]==0))
n_dis <- ((sig_loops_counts[,1]>=5 &sig_loops_counts[,3]==0) | (sig_loops_counts[,3]>=5 &sig_loops_counts[,1]==0))
qc_filt <- subsetLoops(sig_loops, !(k_dis | m_dis | n_dis))
dim(qc_filt)
# anchors interactions samples colData rowData
#1 59763 130357 3 2 3
plt1 <- loopDistancePlot(qc_filt)
png("/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/plot_loop_distance_min5loops-in-all.png", width = 6, height = 6, res = 600, units = 'in')
plt1
par(mar=c(5,5,5,5) + 0.1)
dev.off()
loopMetrics(qc_filt)
# hAstro U3008 U3009
#self 1655 1430 906
#unique 74347 103698 50291
#plotting PCA
pcp1dat <- qc_filt
pcp1dat@colData$sizeFactor <- 1
samples <- c("hAstro", "U3008", "U3009")
pcp1 <- pcaPlot(pcp1dat) + geom_text_repel(aes(label=samples))
pcp1
png("/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/plotPCA_min5loops-in-all.png", width = 6, height = 6, res = 600, units = 'in')
pcp1
par(mar=c(6,5,5,6) + 0.1)
dev.off()
#differential analysis with loops
#fetch number of loops with min size of loop 5kb
sigloops_5kb <- filterLoops(sig_loops, width=5000, nsamples = 3)
dim(sigloops_5kb)
#anchors interactions samples colData rowData
#1 60137 135688 3 2 3
group <- factor(c("control", "GBM", "GBM"))
design <- model.matrix(~sigloops_5kb@colData$groups)
design
#(Intercept) sigloops_5kb@colData$groupsGBM
#1 1 0
#2 1 1
#3 1 1
#attr(,"assign")
#[1] 0 1
#attr(,"contrasts")
#attr(,"contrasts")$`sigloops_5kb@colData$groups`
#[1] "contr.treatment"
head(sigloops_5kb)
#fitting test
km_res <- loopAssoc(sigloops_5kb, method = "edgeR", design = design,
contrast = "contr.treatment")
#The coefficients of the fitted GLM object are:
# (Intercept) groupsGBMError in glmLRT(fit, contrast = contrast) :
# contrast vector of wrong length, should be equal to number of coefficients in the linear model.
km_res <- quickAssoc(sigloops_5kb)
head(km_res)
ranges_km_res_df <- as.data.frame(ranges_km_res)
export.bed(ranges_km_res_df,"/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/2lines_to_hAstro/ranges_differential_anchor_list.bed")
interactions_km_res <- km_res@interactions
write.table(interactions_km_res, "/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/2lines_to_hAstro/anchor-to-anchor.txt", sep = "\t", quote = F)
stats_anchors_km_res <- km_res@rowData
write.table(stats_anchors_km_res, "/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/2lines_to_hAstro/stats_interactions.txt", sep = "\t", quote = F)
head(km_res@rowData)
#loopWidth mango.FDR mango.P logFC logCPM LR PValue FDR
#1 155848 0.002211631 0.0002586233 -0.7994118 4.856058 0.1101437 0.7399805 0.7756031
#2 628887 0.005294649 0.0018399946 2.3900465 4.692128 0.7003779 0.4026567 0.4581754
#3 1112210 0.004706375 0.0010426388 2.3132504 4.668750 0.6720622 0.4123335 0.4581754
#4 1130428 0.004706375 0.0010893690 2.3132504 4.668750 0.6720622 0.4123335 0.4581754
#5 518797 0.005460149 0.0019594349 -3.1126621 4.669383 1.5763606 0.2092857 0.4581754
#6 619239 0.005255405 0.0018130240 -3.1126621 4.669383 1.5763606 0.2092857 0.4581754`
And then I have another issue- I tried all cancer lines (loops from 15 cancer HiChIPs and 1 control HiChIP) and it did not give me a list of significant loops on mangoCorrection
library(diffloop)
library(diffloopdata)
library(ggplot2)
library(GenomicRanges)
library(ggrepel)
library(DESeq2)
library(rtracklayer)
library(GenomicInteractions)
library(edgeR)
library(dplyr)
library(GenomicFeatures)
#mango files are in /Volumes/ucmm/GrpSR/Itzel\&Chaitali/HiChIP_H3K4me3/mango_interaction_loop_file/ folder
#all files were copied as- cp /Volumes/GrpSR08/hicpro_loops/hichipper/hichipper_cell.line/fastq.interactions.all.mango /Volumes/ucmm/GrpSR/Itzel\&Chaitali/HiChIP_H3K4me3/mango_interaction_loop_file/cell.line.interactions.all.mango
#for example: cp /Volumes/GrpSR08/hicpro_loops/hichipper/hichipper_hAstro/fastq.interactions.all.mango /Volumes/ucmm/GrpSR/Itzel\&Chaitali/HiChIP_H3K4me3/mango_interaction_loop_file/hAstro.interactions.all.mango
#the copying codes are in file_naming_copying_to_server.txt in the ~/Desktop/HiChIP/loop_analysis
mybeddir <- file.path("/Volumes/ucmm/GrpSR/Itzel&Chaitali/HiChIP_H3K4me3/mango_interaction_loop_file")
samples <- c("hAstro", "U3008", "U3009", "U3013", "U3028", "U3031", "U3039","U3042", "U3047", "U3054", "U3073", "U3078", "U3085", "U3086", "U3118", "U3121")
full <- loopsMake.mango(mybeddir, samples)
celltypes <- c("control", rep("GBM", 15) )
full <- updateLDGroups(full, celltypes)
head(full)
#An object of class "loops"
#Slot "anchors":
#GRanges object with 6 ranges and 0 metadata columns:
# seqnames ranges strand
# <Rle> <IRanges> <Rle>
# [1] 1 130468-140254 *
# [2] 1 501592-514395 *
# [3] 1 597464-599411 *
# [4] 1 718219-720167 *
# [5] 1 736547-742813 *
# [6] 1 744013-745862 *
# -------
# seqinfo: 123 sequences from an unspecified genome; no seqlengths#
#Slot "interactions":
# left right
#[1,] 1 2
#[2,] 1 5
#[3,] 2 19
#[4,] 3 19
#[5,] 4 46
#[6,] 5 69
#Slot "counts":
# hAstro U3008 U3009 U3013 U3028 U3031 U3039 U3042 U3047 U3054 U3073 U3078 U3085 U3086 U3118 U3121
#[1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
#[2,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
#[3,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
#[4,] 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
#[5,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
#[6,] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
#Slot "colData":
# sizeFactor groups
#hAstro 0.6273424 control
#U3008 0.6135267 GBM
#U3009 0.4160136 GBM
#U3013 1.9933238 GBM
#U3028 1.9933238 GBM
#U3031 1.9933238 GBM
#Slot "rowData":
# loopWidth
#1 372632
#2 604319
#3 431322
#4 340878
#5 933127
#6 1140328"
dim(full)
# anchors interactions samples colData rowData
#1 153284 1057516 16 2 1
# choose loops with FDR <0.01
sig_loops <- mangoCorrection(full, FDR= 0.01)
#Error in data.frame(chr1 = rep(chrom, curlength), start1 = start1[1:curlength], :
#arguments imply differing number of rows: 0, 1