add_seurat_clustering is not working
saeedfc opened this issue · 11 comments
exp_mtx1 <- GetAssayData(SO, assay = "RNA", slot = "data")
exp_mtx1 <- as.matrix(exp_mtx1)
umap_mtx <- as.data.frame(Embeddings(SO, reduction = "umap"))
file.name <- "expanded.loom"
build_loom(
file.name=file.name,
dgem=exp_mtx1,
title="Data 2020", # A name for your data
genome="Human", # Just for user information, not used internally
default.embedding=umap_mtx,
default.embedding.name="Seurat_UMAP"
)
loom <- open_loom("expanded.loom", mode = 'r+')
#Load the DEG markers file as a data frame
wilcoxon_test_markers_csv <- read.csv("/mnt/DATA1/Markers_Fib_Jan27.csv", row.names = 1)
#Save it as an RDS file
saveRDS(wilcoxon_test_markers_csv, "/mnt/DATA1/Fibrosis/Full Scale Analysis/FibroBlasts/Markers_Fib_Jan27.rds")
#Make a named list of path to the markers file
#Name should be the resolution mentioned on the column name of corresponding seurat clustering for which you calculated the markers Eg: here 'res.0.25'
wilcoxon_test_markers <- list(res.0.25 = "/mnt/DATA1/Markers_Fib_Jan27.rds")
#Also for the corresponding resolution , if you have already annotated names for each cluster number, you can add it as a #data.frame
seurat.annotation <- data.frame(integrated_snn_res.0.25 = unique(as.numeric(as.character(SO$integrated_snn_res.0.25))), annotation = unique(as.character(SO$subtype)))
head(seurat.annotation)
# integrated_snn_res.0.25 annotation
#1 1 F3A
#2 2 Act_F
#3 0 Res_I
#4 4 Res_II
#5 3 SC3
#6 6 mixed
#7 5 TFR
#Add the clustering here
add_seurat_clustering(loom = loom
, seurat = SO
, seurat.markers.file.path.list = wilcoxon_test_markers
,seurat.clustering.prefix = 'integrated_snn_res.'
,annotation = seurat.annotation #the DF of annotation; if not annotated, use NULL
,annotation.cluster.id.cn = 'integrated_snn_res.0.25' #Column name for seurat cluster numbers of the annotation dataframe
,annotation.cluster.description.cn = 'annotation' #Column name for the cell types of the annotation dataframe
, seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
, seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
, seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))
I get this error. Can you kindly let me know, what could be going wrong here?
Error in if (nchar(x = d) > 0) { : argument is of length zero
> traceback()
5: FUN(X[[i]], ...)
4: lapply(X = seq_along(along.with = unique.clusters), FUN = function(cluster.idx) {
cluster.id <- unique.clusters[[cluster.idx]]
if (is.numeric(x = cluster.id)) {
description <- paste("NDA - Cluster", cluster.id)
}
else if (is.character(x = cluster.id)) {
cluster.id <- cluster.idx - 1
description <- cluster.id
}
else {
stop("Cluster labels are required to be of class character or numeric.")
}
if (!is.null(x = annotation)) {
annotation <- annotation[names(x = clusters)]
d <- as.character(x = unique(x = annotation[clusters ==
cluster.id]))
if (length(x = d) > 1) {
stop("Annotation is not unique: multiple annotation correspond to a cluster ID.")
}
if (nchar(x = d) > 0) {
description <- paste0(d, " (", cluster.id, ")")
}
}
return(list(id = cluster.id, description = description))
})
3: add_global_md_clustering(loom = loom, id = id, group = group,
name = name, clusters = clusters_as_factor_ids, annotation = annotation)
2: add_annotated_clustering(loom = loom, group = "Seurat", name = paste("Seurat,",
paste0(seurat.clustering.prefix, res)), clusters = cluster_ids,
annotation = cluster_annotation, is.default = is_default_clustering,
overwrite.default = default.clustering.overwrite)
1: add_seurat_clustering(loom = loom, seurat = SO, seurat.markers.file.path.list = wilcoxon_test_markers,
seurat.clustering.prefix = "integrated_snn_res.", annotation = seurat.annotation,
annotation.cluster.id.cn = "integrated_snn_res.0.25", annotation.cluster.description.cn = "annotation",
seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj"),
seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value"),
seurat.marker.metric.description = c("Average log fold change from Wilcox differential test",
"Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))
Thanks,
Saeed
Hi @dweemx ,
I use '0.10.1' version.
All other functions work well. Only the above part of adding seurat clustering + markers do not work. Adding column attributes is working, so I can visualize annotations. But not the markers.
Kind regards,
Saeed
Hey @saeedfc,
I couldn't reproduce your error with the latest version. The funcion add_seurat_clustering
works for me:
add_seurat_clustering(loom = loom,
seurat = so,
seurat.assay = "RNA",
seurat.clustering.prefix = "SCT_snn_res.",
default.clustering.resolution = "res.3",
seurat.markers.file.path.list = list(
SCT_snn_res.0.5 = "10xtest_seurat_markers.rds.gz",
SCT_snn_res.3 = "10xtest_seurat_markers2.rds.gz"
),
seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj"),
seurat.marker.metric.names = c("Avg. logFC", "adjusted P-value"),
seurat.marker.metric.description = c("Average log fold change", "Adjusted p-value (BF)"),
annotation = annotation,
annotation.cluster.id.cn = "cluster_id",
annotation.cluster.description.cn = "cluster_name")
Could you try to use the latest version of SCopeLoomR
(version 0.10.4) please ? It should be fixed in that version
Hi,
I just installed the latest version. now the command run without any error. But the markers do not get added.
wilcoxon_test_markers <- list(res.0.2 = paste0(getwd(),"/Markers_0.2.rds")
#Also for the corresponding resolution , if you have already annotated names for each cluster number, you can add it as a data.frame
seurat.annotation <- data.frame(integrated_snn_res.0.2 = unique(as.numeric(as.character(SO$integrated_snn_res.0.2))), annotation = unique(as.character(SO$subtype)))
add_seurat_clustering(loom = loom
, seurat = SO
, seurat.markers.file.path.list = wilcoxon_test_markers
,seurat.clustering.prefix = 'integrated_snn_res.'
,annotation = seurat.annotation #the DF of annotation; if not annotated, use NULL
,annotation.cluster.id.cn = 'integrated_snn_res.0.2' #Column name for seurat cluster numbers of the annotation dataframe
,annotation.cluster.description.cn = 'annotation' #Column name for the cell types of the annotation dataframe
, seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
, seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
, seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))
output below
[1] "Seurat, integrated_snn_res.1"
[1] "Adding Seurat clusters..."
[1] "Clusterings created..."
[1] "Clustering ID: 0"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 1 have not been computed.[1] "Seurat, integrated_snn_res.2"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 1"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 2 have not been computed.[1] "Seurat, integrated_snn_res.0.6"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 2"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.6 have not been computed.[1] "Seurat, integrated_snn_res.0.25"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 3"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.25 have not been computed.[1] "Seurat, integrated_snn_res.0.4"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 4"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.4 have not been computed.[1] "Seurat, integrated_snn_res.0.2"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 5"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.2 have not been computed.[1] "Seurat, integrated_snn_res.0.3"
[1] "Adding Seurat clusters..."
[1] "Clusterings already exists..."
[1] "Clustering ID: 6"
[1] "Adding Seurat markers..."
Seurat markers for clustering resolution 0.3 have not been computed.
It came out as if there was no markers calculated for res 0.2 even though I provided the path in the list.
hi saeed,
can you try again but with slightly different marker names:
wilcoxon_test_markers <- list(integrated_snn_res.0.2 = paste0(getwd(),"/Markers_0.2.rds")
Hi @tropfenameimer ,
Thanks!
It seems to have worked now.
output
[1] "Seurat, integrated_snn_res.0.2"
[1] "Adding Seurat clusters..."
[1] "Clusterings created..."
[1] "Clustering ID: 0"
[1] "Adding Seurat markers..."
[1] "Adding markers for clustering 0..."
[1] "Adding metrics for clustering 0..."
However, upon uploading to scope, I do not understand how I can utilize the markers list? Is this just a way to store the markers list in the loom file? Or is there a tool in Scope where we can see the top markers of a cluster?
Also, you may kindly consider about updating the vignette.
The format, for instance the naming (integrated_snn_res.0.2 instead of res.0.2) I used here, and all is difficult to decipher from the vignette.
Due to different errors and troubles over time, this is how I now assemble my loom object from Seurat. The features might have got better since I made this script. I leave it here in case if it is useful for someone else.
##Import Seurat Object
SO <- readRDS("Seurat_Object")
##Extract normalized RNA counts for visualization
exp_mtx <- GetAssayData(SO, assay = "RNA", slot = "data")
exp_mtx <- as.matrix(exp_mtx1)
## Extract UMAP embedding
umap_mtx <- as.data.frame(Embeddings(SO, reduction = "umap"))
##Create your basic loom file
file.name <- "Data_Feb18_2021.loom"
build_loom(
file.name=file.name,
dgem=exp_mtx,
title="Data 2020", # A namr for your data
genome="Human", # Just for user information, not used internally
default.embedding=umap_mtx,
default.embedding.name="Seurat_UMAP"
)
#Now you have created the basic loom file. This file is now sufficient to visualize genes on the embeddings.
#Below is how I add the clusterings.
#open the loom file first
loom <- open_loom("Data_Feb18_2021.loom", mode = 'r+')
#Load the DEG markers file as a data frame
wilcoxon_test_markers_csv <- read.csv(paste0(my_path,"/Full_Markers_Jan27_ONLY_POS.csv"), row.names = 1, stringsAsFactors = F)
#Save it as an RDS file
saveRDS(wilcoxon_test_markers, paste0(my_path,"/Full_Markers_Jan27_ONLY_POS.rds"))
#Make a named list of path to the markers file
#Name should be the resolution mentioned on the column name of corresponding seurat clustering for which you calculated the markers Eg: here 'res.0.2'
wilcoxon_test_markers <- list(integrated_snn_res.0.2 = paste0(my_path,"/Full_Markers_Jan27_ONLY_POS.rds"))
#Also for the corresponding resolution , if you have already annotated names for each cluster number, you can add it as a data.frame
seurat.annotation <- data.frame(integrated_snn_res.0.2 = unique(as.numeric(as.character(SO$integrated_snn_res.0.2))), annotation = unique(as.character(SO$subtype)))
#Add the clustering here
add_seurat_clustering(loom = loom
, seurat = SO
, seurat.markers.file.path.list = wilcoxon_test_markers
,seurat.clustering.prefix = 'integrated_snn_res.'
,annotation = seurat.annotation #the DF of annotation; if not annotated, use NULL
,annotation.cluster.id.cn = 'integrated_snn_res.0.2' #Column name for seurat cluster numbers of the annotation dataframe
,annotation.cluster.description.cn = 'annotation' #Column name for the cell types of the annotation dataframe
, seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
, seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
, seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))
add_seurat_clustering(loom = loom
, seurat = SO
, seurat.markers.file.path.list = wilcoxon_test_markers
,seurat.clustering.prefix = 'integrated_snn_res.'
,annotation = NULL #the DF of annotation; if not annotated, use NULL
,annotation.cluster.id.cn = NULL #Column name for seurat cluster numbers of the annotation dataframe
,annotation.cluster.description.cn = NULL #Column name for the cell types of the annotation dataframe
, seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
, seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
, seurat.marker.metric.description = c("Average log fold change from Wilcox differential test", "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset"))
##Add extra clusterings as column attributes
add_col_attr(loom=loom, key = "Resolution_1.0",value= as.character(SO@meta.data$res_1.0), as.annotation=T)
add_col_attr(loom=loom, key = "Resolution_0.6",value= as.character(SO@meta.data$res_0.6), as.annotation=T)
add_col_attr(loom=loom, key = "Resolution_0.4",value= as.character(SO@meta.data$res_0.4), as.annotation=T)
##Addother categorical variables or annotations
add_col_attr(loom=loom, key = "Sample_type", value= as.character(SO@meta.data$Sample_type),
as.annotation=T)
add_col_attr(loom=loom, key = "Sample", value= as.character(SO@meta.data$Sample), as.annotation=T)
add_col_attr(loom=loom, key = "Patient_Type", value= as.character(SO@meta.data$Patient_Type), as.annotation=T)
add_col_attr(loom=loom, key = "Segment", value= as.character(SO@meta.data$Segment), as.annotation=T)
add_col_attr(loom=loom, key = "Gender", value= as.character(SO@meta.data$Gender), as.annotation=T)
add_col_attr(loom=loom, key = "Cell_Cycle_Phase", value= as.character(SO@meta.data$Phase), as.annotation=T)
##Add continuous variables or metrics
add_col_attr(loom=loom, key = "nCount_RNA", value= as.numeric(SO@meta.data$nCount_RNA), as.metric = T)
add_col_attr(loom=loom, key = "nFeature_RNA", value= as.numeric(SO@meta.data$nFeature_RNA), as.metric = T)
add_col_attr(loom=loom, key = "percent.ribo_protein", value= as.numeric(SO@meta.data$percent.ribo), as.metric = T)
add_col_attr(loom=loom, key = "percent.mt", value= as.numeric(SO@meta.data$percent.mt), as.metric = T)
add_col_attr(loom=loom, key = "S_phase_module_score", value= as.numeric(SO@meta.data$S.Score), as.metric = T)
add_col_attr(loom=loom, key = "G2M_phase_module_score", value= as.numeric(SO@meta.data$G2M.Score), as.metric = T)
add_col_attr(loom=loom, key = "ribo_protein_gene_module_score", value= as.numeric(SO@meta.data$rp_gene_module_score), as.metric = T)
finalize(loom=loom)
close_loom(loom= loom)
Kind regards,
Saeed
Hi @saeedfc,
I agree with you. Vignette is quite old and it would be nice to have it refurbished. Unfortunately, I haven't taken time to update it yet.
Regarding the markers in SCope, you can see them in the right sidebar when you query a cluster.
Hi @dweemx ,
Thanks for the remarks on scope. Could it be that it didn't work for me as then? I tried some of the data you have already deposited as well. There also if I select a cluster, I don't see the markers associated with the cluster, rather I do see which clustering, how many proportion of cells etc..
Kind regards,
Saeed
hi @saeedfc,
we are working on improving SCopeLoomR and the documentation.
in the mean time, please try adding annotated clusters along with markers as follows:
cluster_name <- "integrated_snn_res.0.2"
cluster_list <- setNames(SO@meta.data[, cluster_name], rownames(SO@meta.data))
annot_list <- setNames(seurat.annotation[cluster_list, "annotation"], names(cluster_list))
add_annotated_clustering(loom = loom,
group = "Seurat",
name = cluster_name,
clusters = cluster_list,
annotation = annot_list,
is.default = T,
overwrite.default = NULL)
markers <- readRDS(paste0(my_path,"/Full_Markers_Jan27_ONLY_POS.rds"))
add_clustering_markers(loom = loom,
clustering.id = 0,
clustering.markers = split(markers, markers$cluster),
marker.metric.accessors = c("avg_logFC", "p_val_adj"),
marker.metric.names = c("Avg. logFC", "adjusted P-value"),
marker.metric.description = c("Average log fold change", "Adjusted p-value (BF)"))
close_loom(loom)
you should then be able to select a cluster in SCope and see the marker list for this cluster in the right-hand pannel.
Hi,
I tried this code; However, I am getting an error. Trying to work it out, however, no solution till now.
> add_annotated_clustering(loom = loom,
group = "Seurat",
name = cluster_name,
clusters = cluster_list,
annotation = annot_list,
is.default = T,
overwrite.default = T)
[1] "Clusterings created..."
Error in if (nchar(x = d) > 0) { : missing value where TRUE/FALSE needed
> traceback()
4: FUN(X[[i]], ...)
3: lapply(X = seq_along(along.with = unique.clusters), FUN = function(cluster.idx) {
cluster.id <- unique.clusters[[cluster.idx]]
if (is.numeric(x = cluster.id)) {
description <- paste("NDA - Cluster", cluster.id)
}
else if (is.character(x = cluster.id)) {
cluster.id <- cluster.idx - 1
description <- cluster.id
}
else {
stop("Cluster labels are required to be of class character or numeric.")
}
if (!is.null(x = annotation)) {
annotation <- annotation[names(x = clusters)]
d <- as.character(x = unique(x = annotation[clusters ==
cluster.id]))
if (length(x = d) > 1) {
stop("Annotation is not unique: multiple annotation correspond to a cluster ID.")
}
if (nchar(x = d) > 0) {
description <- paste0(d, " (", cluster.id, ")")
}
}
return(list(id = cluster.id, description = description))
})
2: add_global_md_clustering(loom = loom, id = id, group = group,
name = name, clusters = clusters_as_factor_ids, annotation = annotation)
1: add_annotated_clustering(loom = loom, group = "Seurat", name = cluster_name,
clusters = cluster_list, annotation = annot_list, is.default = T,
overwrite.default = T)
Hi, I'm encountering the same issue...
In my think, this error occurs when the cluster has no cell.
Because I'm dealing with WT/KO conditions,
I integrated WT/KO Seurat objects using CCA, and clustered them.
So there can be an empty cluster for the specific condition.
I've run the code below for 6 Seurat objects, but the error occurred only when there is an 'empty cluster'.
library(Seurat)
library(SCopeLoomR)
SeuratObj <- readRDS("~/SP.sct.pca15.rds")
SeuratObj[["percent.mt"]] <- PercentageFeatureSet(SeuratObj, pattern = "^mt-")
SeuratObj[["groud_id"]] <- lapply(list(SeuratObj@meta.data[,1]), function(x) substr(x,3,8))
H_Epi <- subset(SeuratObj, groud_id == "H_Epi ")
build_loom(file.name = "~/H.Sp.sct.pca15.loom",
dgem = H_Epi@assays$RNA@counts,
title = "H.SQ.sct.pca15",
default.embedding = H_Epi@reductions$umap@cell.embeddings,
default.embedding.name = "umap.rna")
loom <- open_loom("/node210data/project/mouse_NK_cell/scenic_out/Conditional/H.Sp.sct.pca15.loom", mode = "r+")
add_embedding(loom = loom,
embedding = H_Epi@reductions$pca@cell.embeddings,
name = "pca")
add_col_attr(loom = loom, key = "batch",
value = H_Epi@meta.data$orig.ident, as.annotation = TRUE)
kmeans_markers <- FindAllMarkers(H_Epi, only.pos = TRUE, min.pct = 0.5, logfc.threshold = 0.5)
add_seurat_clustering(loom = loom,
seurat = H_Epi,
seurat.assay = "RNA",
seurat.clustering.prefix = "integrated_snn_res.",
default.clustering.resolution = "res.0.5"
)
add_clustering_markers(loom = loom,
clustering.id = 1,
clustering.markers = split(kmeans_markers, kmeans_markers$cluster),
marker.metric.accessors = c("avg_logFC", "p_val_adj"),
marker.metric.names = c("Avg. log2FC", "adjusted P-value"),
marker.metric.description = c("Average log fold change", "Adjusted p-value (BF)")
)
close_loom(loom)
below is the error message from add_seurat_clustering.
[1] "Seurat, integrated_snn_res.0.8"
[1] "Adding Seurat clusters..."
[1] "Clusterings created..."
Error in if (nchar(x = d) > 0) {: argument is of length zero
Traceback:
1. add_seurat_clustering(loom = loom, seurat = L_Epi, seurat.assay = "RNA",
. seurat.clustering.prefix = "integrated_snn_res.", default.clustering.resolution = "res.0.5")
2. add_annotated_clustering(loom = loom, group = "Seurat", name = paste("Seurat,",
. paste0(seurat.clustering.prefix, res)), clusters = cluster_ids,
. annotation = cluster_annotation, is.default = is_default_clustering,
. overwrite.default = default.clustering.overwrite)
3. add_global_md_clustering(loom = loom, id = id, group = group,
. name = name, clusters = clusters_as_factor_ids, annotation = annotation)
4. lapply(X = seq_along(along.with = unique.clusters), FUN = function(cluster.idx) {
. cluster.id <- unique.clusters[[cluster.idx]]
. if (is.numeric(x = cluster.id)) {
. description <- paste("NDA - Cluster", cluster.id)
. }
. else if (is.character(x = cluster.id)) {
. cluster.id <- cluster.idx - 1
. description <- cluster.id
. }
. else {
. stop("Cluster labels are required to be of class character or numeric.")
. }
. if (!is.null(x = annotation)) {
. annotation <- annotation[names(x = clusters)]
. d <- as.character(x = unique(x = annotation[clusters ==
. cluster.id]))
. if (length(x = d) > 1) {
. stop("Annotation is not unique: multiple annotation correspond to a cluster ID.")
. }
. if (nchar(x = d) > 0) {
. description <- paste0(d, " (", cluster.id, ")")
. }
. }
. return(list(id = cluster.id, description = description))
. })
5. FUN(X[[i]], ...)
Thansk,