is there anyway to extract the deletions from CNAmtx.RData for downstream analysis?

Question

is there anyway to extract the deletions from CNAmtx.RData for downstream analysis?

mbihie opened this issue a year ago · 1 comments

Hello,

I am trying to visualize the deletions from different samples by using the CNAmtx.RData provided by SCEVAN. I assume this data is what is used to generate the heatmaps. I am able to take CNAmtx.RData and count_mtx_annot.RData to create a Seurat object, which I can use to create a violin plot. I am struggling on filtering the data to only the deletions. Is there any way to do this?

Any help would be greatly appreciated.

code

#load count matrices
load("data/TN.B1.0554_CNAmtx.RData")
CNA.TN.B1.0554 <- CNA_mtx_relat

#load annotation
load("~/BRCA/brn-server/TN_B1_0554, TN_0126, N_1105_epi/TN.B1.0554_count_mtx_annot.RData")
ANN.TN.B1.0554 <- count_mtx_annot

#create integer of the same length of the mtx -> bind gene names to mtx using this vctr
mtxlngth <- 1:(nrow(CNA.TN.B1.0554))
ANN.TN.B1.0554 <- cbind(ANN.TN.B1.0554,as.data.frame(mtxlngth))

#filter to gene names and length integer
ANN.TN.B1.0554 <- ANN.TN.B1.0554 %>%
  select(gene_name, mtxlngth)

#add length integer to matrix
CNA.TN.B1.0554 <- cbind(as.data.frame(CNA.TN.B1.0554),as.data.frame(mtxlngth))

#combine gene names to matrix
CNA.TN.B1.0554 <- CNA.TN.B1.0554 %>%
  left_join(ANN.TN.B1.0554, by = "mtxlngth")

#make gene names rownames
CNA.TN.B1.0554 <- column_to_rownames(CNA.TN.B1.0554, var = "gene_name")

#remove mtxlength
CNA.TN.B1.0554 <- CNA.TN.B1.0554 %>%
  select(-mtxlngth)

#Create Seurat Object with SCEVAN info
srt.0554 <- Seurat::CreateSeuratObject(CNA.TN.B1.0554, meta.data = outputAnalysis[[1]][["TN.B1.0554"]])

VlnPlot(srt.0554, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)

Answer 1 · 2023-10-14T11:07:06.000Z

Hi @mbihie

I don't know if I understand your aim correctly, but in Seurat, you can load the alteration information and filter for example like this:

load("MGH105_CNAmtx.RData")
load("MGH105_count_mtx_annot.RData")

rownames(CNA_mtx_relat) <- count_mtx_annot$gene_id

### Take from the output segmentation files the segment containing the alteration you are interested in. 
### chr 7	start 40097134	end 72828198

subSet <- CNA_mtx_relat[count_mtx_annot$seqnames == 7 & count_mtx_annot$start >= 40097134 & count_mtx_annot$end <= 72828198,] 

Amp7 <- apply(subSet, 2, unique)
Amp7 <- as.data.frame(Amp7)

library(Seurat)
CNA_seurat <- CreateSeuratObject(CNA_mtx_relat, meta.data = Amp7)

all.genes <- rownames(CNA_seurat)
CNA_seurat <- ScaleData(CNA_seurat, features = all.genes)
CNA_seurat <- RunPCA(CNA_seurat, features = all.genes)
CNA_seurat <- FindNeighbors(CNA_seurat, dims = 1:10)
CNA_seurat <- FindClusters(CNA_seurat, resolution = 0.5)
CNA_seurat <- RunUMAP(CNA_seurat, dims = 1:10)

p1 <- VlnPlot(CNA_seurat, features = c("Amp7"), ncol = 2)
p2 <- FeaturePlot(CNA_seurat, features = c("Amp7"))
gridExtra::grid.arrange(p1,p2, nrow = 1)

### Subset the cells by taking only those with a certain CN log ratio based on the alteration you are interested in. 
subset(x = CNA_seurat, subset = Amp7 > 0.1)

### Or you can select them on the basis of copy number clustering. 
subset(x = CNA_seurat, subset = seurat_clusters == 0)