aryeelab/diffloop-paper

pairwise comparison for one control vs one treated - diffloop/ HiChIP

chaigenomics opened this issue · 5 comments

Hi,
Caleb and Martin.
I have an issue with diffloop. I have 16 HiChIP samples, 1 control and 4 cancer cell lines, processed by HiC-Pro and hichipper to fetch loops. No I am interested in finding loss and gain in interaction, which is why I am using diffloop. Since the 15 cancer lines are extremely heterogeneous, I was planning to do pairwise analysis, so control to line A and control to line B etc. But when I do loopAssoc, for my diff loop object, I am thrown this error
group <- factor(c("control", "GBM")) design <- model.matrix(~group) km_res <- loopAssoc(sigloops_5kb, method = "edgeR", design = design, contrast = "contr.treatment")
The coefficients of the fitted GLM object are: (Intercept) groupsGBMError in glmFit.default(y, design = design, dispersion = dispersion, offset = offset, : NA dispersions not allowed In addition: Warning message: In estimateGLMCommonDisp.default(y = y$counts, design = design, : No residual df: setting dispersion to NA
Is it so, that I can't do pairwise comparisons, with just one replicate for diffloop, or is it something wrong with my design modes

Hi Caleb,

This is the code that did not work (1 control to 1 cancer)

`library(diffloop)
library(diffloopdata)
library(ggplot2)
library(GenomicRanges)
library(ggrepel)
library(DESeq2)
library(rtracklayer)
library(GenomicInteractions)
library(edgeR)
library(dplyr)
library(GenomicFeatures)


mybeddir1 <- file.path("/Volumes/ucmm/GrpSR/Itzel&Chaitali/HiChIP_H3K4me3/test")
samples1 <- c("hAstro", "U3121")
full1 <- loopsMake.mango(mybeddir1, samples1)
cells <- c("control", "GBM")
full1 <- updateLDGroups(full1, cells)
sig_loops_pairwise <- mangoCorrection(full1, FDR= 0.01)

dim(sig_loops_pairwise)
#anchors interactions samples colData rowData
#1   31761        65659       2       2       3

sig_loops_anchors <- as.data.frame(sig_loops_pairwise@anchors)
sig_loops_counts <- as.data.frame(sig_loops_pairwise@counts)
sig_loops_interactions <- as.data.frame(sig_loops_pairwise@interactions)
sig_loops_rowData <- as.data.frame(sig_loops_pairwise@rowData) 

k_dis <- ((sig_loops_counts[,1]>=5 &sig_loops_counts[,2]==0) | (sig_loops_counts[,2]>=5 &sig_loops_counts[,1]==0))


qc_filt <- subsetLoops(sig_loops, !k_dis)

km_filt <- qc_filt[,c(1,2)]

dim(km_filt)
#anchors interactions samples colData rowData
#1   53175       107029       2       2       3

km_res <- quickAssoc(km_filt)

"Error in glmFit.default(y, design = design, dispersion = dispersion, offset = offset,  : 
  NA dispersions not allowed
In addition: Warning message:
In estimateGLMCommonDisp.default(y = y$counts, design = design,  :
  No residual df: setting dispersion to NA"
`

This is the code that works:

`library(diffloop)
library(diffloopdata)
library(ggplot2)
library(GenomicRanges)
library(ggrepel)
library(DESeq2)
library(rtracklayer)
library(GenomicInteractions)
library(edgeR)
library(dplyr)
library(GenomicFeatures)
mybeddir <- file.path("/Volumes/GrpSR08/hicpro_loops/mangoloops")
samples1 <- c("hAstro", "U3008", "U3009")
full1 <- loopsMake.mango(mybeddir, samples1)

celltypes <- c("control", "GBM", "GBM")

full1 <- updateLDGroups(full1, celltypes)
head(full1)
dim(full1)
#  anchors interactions samples colData rowData
#1   76780       270973       3       2       1

#quality control

#first rule is to remove CNV regions, to reduce bias in topology due to structural variation
#second choose loops with FDR <0.01

sig_loops <- mangoCorrection(full1, FDR= 0.01)
dim(sig_loops)
#   anchors interactions samples colData rowData
#1   60141       136059       3       2       3

sig_loops_anchors <- as.data.frame(sig_loops@anchors)
sig_loops_counts <- as.data.frame(sig_loops@counts)
sig_loops_interactions <- as.data.frame(sig_loops@interactions)
sig_loops_rowData <- as.data.frame(sig_loops@rowData)


#another thing is in PET experiments, they remove loops present 5 or more in experiment A and 0 in B. This reduces the enrichment bias, or gained, loss bias. Although, it seems that
# an interaction like this is what we are looking into. Therefore I will do the qc with both and see, if PCA and loop distance differs too much

k_dis <- ((sig_loops_counts[,1]>=5 &sig_loops_counts[,2]==0) | (sig_loops_counts[,2]>=5 &sig_loops_counts[,1]==0))
m_dis <- ((sig_loops_counts[,2]>=5 &sig_loops_counts[,3]==0) | (sig_loops_counts[,3]>=5 &sig_loops_counts[,2]==0))
n_dis <- ((sig_loops_counts[,1]>=5 &sig_loops_counts[,3]==0) | (sig_loops_counts[,3]>=5 &sig_loops_counts[,1]==0))


qc_filt <- subsetLoops(sig_loops, !(k_dis | m_dis | n_dis))

dim(qc_filt)
#   anchors interactions samples colData rowData
#1   59763       130357       3       2       3

plt1 <- loopDistancePlot(qc_filt)
png("/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/plot_loop_distance_min5loops-in-all.png", width = 6, height = 6, res = 600, units = 'in')
plt1
par(mar=c(5,5,5,5) + 0.1)
dev.off()

loopMetrics(qc_filt)
#       hAstro  U3008 U3009
#self     1655   1430   906
#unique  74347 103698 50291


#plotting PCA
pcp1dat <- qc_filt
pcp1dat@colData$sizeFactor <- 1
samples <- c("hAstro", "U3008", "U3009")
pcp1 <- pcaPlot(pcp1dat) + geom_text_repel(aes(label=samples)) 
pcp1

png("/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/plotPCA_min5loops-in-all.png", width = 6, height = 6, res = 600, units = 'in')
pcp1
par(mar=c(6,5,5,6) + 0.1)
dev.off()

#differential analysis with loops
#fetch number of loops with min size of loop 5kb
sigloops_5kb <- filterLoops(sig_loops, width=5000, nsamples = 3)
dim(sigloops_5kb)

#anchors interactions samples colData rowData
#1   60137       135688       3       2       3


group <- factor(c("control", "GBM", "GBM"))
design <- model.matrix(~sigloops_5kb@colData$groups)
design
#(Intercept) sigloops_5kb@colData$groupsGBM
#1           1                              0
#2           1                              1
#3           1                              1
#attr(,"assign")
#[1] 0 1
#attr(,"contrasts")
#attr(,"contrasts")$`sigloops_5kb@colData$groups`
#[1] "contr.treatment"



head(sigloops_5kb)

#fitting test
km_res <- loopAssoc(sigloops_5kb, method = "edgeR", design = design,
                    contrast = "contr.treatment")

#The coefficients of the fitted GLM object are:
#  (Intercept) groupsGBMError in glmLRT(fit, contrast = contrast) : 
#  contrast vector of wrong length, should be equal to number of coefficients in the linear model.

km_res <- quickAssoc(sigloops_5kb)

head(km_res)
ranges_km_res_df <- as.data.frame(ranges_km_res)
export.bed(ranges_km_res_df,"/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/2lines_to_hAstro/ranges_differential_anchor_list.bed")

interactions_km_res <- km_res@interactions
write.table(interactions_km_res, "/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/2lines_to_hAstro/anchor-to-anchor.txt", sep = "\t", quote = F)
stats_anchors_km_res <- km_res@rowData
write.table(stats_anchors_km_res, "/Users/chaitalichakraborty/Desktop/HiChIP/hichip_diffloop_test/2lines_to_hAstro/stats_interactions.txt", sep = "\t", quote = F)
head(km_res@rowData)

#loopWidth   mango.FDR      mango.P      logFC   logCPM        LR    PValue       FDR
#1    155848 0.002211631 0.0002586233 -0.7994118 4.856058 0.1101437 0.7399805 0.7756031
#2    628887 0.005294649 0.0018399946  2.3900465 4.692128 0.7003779 0.4026567 0.4581754
#3   1112210 0.004706375 0.0010426388  2.3132504 4.668750 0.6720622 0.4123335 0.4581754
#4   1130428 0.004706375 0.0010893690  2.3132504 4.668750 0.6720622 0.4123335 0.4581754
#5    518797 0.005460149 0.0019594349 -3.1126621 4.669383 1.5763606 0.2092857 0.4581754
#6    619239 0.005255405 0.0018130240 -3.1126621 4.669383 1.5763606 0.2092857 0.4581754`

And then I have another issue- I tried all cancer lines (loops from 15 cancer HiChIPs and 1 control HiChIP) and it did not give me a list of significant loops on mangoCorrection

library(diffloop)
library(diffloopdata)
library(ggplot2)
library(GenomicRanges)
library(ggrepel)
library(DESeq2)
library(rtracklayer)
library(GenomicInteractions)
library(edgeR)
library(dplyr)
library(GenomicFeatures)


#mango files are in /Volumes/ucmm/GrpSR/Itzel\&Chaitali/HiChIP_H3K4me3/mango_interaction_loop_file/ folder
#all files were copied as- cp /Volumes/GrpSR08/hicpro_loops/hichipper/hichipper_cell.line/fastq.interactions.all.mango /Volumes/ucmm/GrpSR/Itzel\&Chaitali/HiChIP_H3K4me3/mango_interaction_loop_file/cell.line.interactions.all.mango
#for example: cp /Volumes/GrpSR08/hicpro_loops/hichipper/hichipper_hAstro/fastq.interactions.all.mango /Volumes/ucmm/GrpSR/Itzel\&Chaitali/HiChIP_H3K4me3/mango_interaction_loop_file/hAstro.interactions.all.mango
#the copying codes are in file_naming_copying_to_server.txt in the ~/Desktop/HiChIP/loop_analysis
mybeddir <- file.path("/Volumes/ucmm/GrpSR/Itzel&Chaitali/HiChIP_H3K4me3/mango_interaction_loop_file")
samples <- c("hAstro", "U3008", "U3009", "U3013", "U3028", "U3031", "U3039","U3042", "U3047", "U3054", "U3073", "U3078", "U3085", "U3086", "U3118", "U3121")
full <- loopsMake.mango(mybeddir, samples)
celltypes <- c("control", rep("GBM", 15) )
full <- updateLDGroups(full, celltypes)
head(full)
#An object of class "loops"
#Slot "anchors":
#GRanges object with 6 ranges and 0 metadata columns:
#      seqnames        ranges strand
#         <Rle>     <IRanges>  <Rle>
#  [1]        1 130468-140254      *
#  [2]        1 501592-514395      *
#  [3]        1 597464-599411      *
#  [4]        1 718219-720167      *
#  [5]        1 736547-742813      *
#  [6]        1 744013-745862      *
#  -------
#  seqinfo: 123 sequences from an unspecified genome; no seqlengths#

#Slot "interactions":
#     left right
#[1,]    1     2
#[2,]    1     5
#[3,]    2    19
#[4,]    3    19
#[5,]    4    46
#[6,]    5    69

#Slot "counts":
#    hAstro U3008 U3009 U3013 U3028 U3031 U3039 U3042 U3047 U3054 U3073 U3078 U3085 U3086 U3118 U3121
#[1,]      0     0     0     0     0     0     0     0     0     0     0     0     0     1     0     0
#[2,]      0     0     0     0     0     0     0     0     0     0     0     0     0     1     0     0
#[3,]      0     0     0     0     0     0     0     0     0     0     0     0     0     1     0     0
#[4,]      0     0     0     0     0     0     0     0     0     0     1     0     0     0     0     0
#[5,]      0     0     0     0     0     0     0     0     0     0     0     0     0     1     0     0
#[6,]      0     0     0     0     0     0     0     0     0     0     0     0     0     1     0     0

#Slot "colData":
#       sizeFactor  groups
#hAstro  0.6273424 control
#U3008   0.6135267     GBM
#U3009   0.4160136     GBM
#U3013   1.9933238     GBM
#U3028   1.9933238     GBM
#U3031   1.9933238     GBM

#Slot "rowData":
#  loopWidth
#1    372632
#2    604319
#3    431322
#4    340878
#5    933127
#6   1140328"

dim(full)
#   anchors interactions samples colData rowData
#1  153284      1057516      16       2       1


# choose loops with FDR <0.01

sig_loops <- mangoCorrection(full, FDR= 0.01)
#Error in data.frame(chr1 = rep(chrom, curlength), start1 = start1[1:curlength],  : 
#arguments imply differing number of rows: 0, 1