Starlitnightly/omicverse

ov.pp.qc() Error

Closed this issue · 2 comments

The code I wrote based on the tutorials:

import omicverse as ov
import scanpy as sc

MCAO1 = sc.read_10x_mtx(
    'data/MCAO1/',  
    var_names='gene_symbols',               
    cache=True)                              

MCAO2 = sc.read_10x_mtx(
    'data/MCAO2/', 
    var_names='gene_symbols',            
    cache=True)                            
MCAO2.obs['batch'] = 'MCAO2'

MCAO3 = sc.read_10x_mtx(
    'data/MCAO3/',
    var_names='gene_symbols',           
    cache=True)                           
MCAO3.obs['batch'] = 'MCAO3'

sham1 = sc.read_10x_mtx(
    'data/sham1/',  
    var_names='gene_symbols',               
    cache=True)                             
sham1.obs['batch'] = 'sham1'

sham2 = sc.read_10x_mtx(
    'data/sham2/', 
    var_names='gene_symbols',               
    cache=True)                             
sham2.obs['batch'] = 'sham2'

sham3 = sc.read_10x_mtx(
    'data/sham3/',  
    var_names='gene_symbols',              
    cache=True)                           
sham3.obs['batch'] = 'sham3'

adata=sc.concat([MCAO1,MCAO2,MCAO3,sham1,sham2,sham3],merge='same')
adata

adata.obs['batch'].unique()

adata=ov.pp.qc(adata,
              tresh={'mito_perc': 0.2, 'nUMIs': 500, 'detected_genes': 250},
              batch_key='batch')

Then it outputted an error:
ValueError: Length of passed value for obs_names is 62584, but this AnnData has shape: (58528, 27998).

I found the size of the AnnData MCAO1,MCAO2,MCAO3,sham1,sham2,sham3 are 11772 * 27998, 11361 * 27998, 8104 * 27998, 8771 * 27998, 8540 * 27998 and 9980 * 27998, respectively. The sum of n_obs is 58528. Why the passed value for obs_names is 62584?

Here is the complete error message:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[13], line 1
----> 1 adata=ov.pp.qc(adata,
      2               tresh={'mito_perc': 0.2, 'nUMIs': 500, 'detected_genes': 250},
      3               batch_key='batch')
      4 adata

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/omicverse/pp/_qc.py:184, in qc(adata, **kwargs)
    182 else:
    183     print('CPU mode activated')
--> 184     return qc_cpu(adata,**kwargs)

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/omicverse/pp/_qc.py:253, in qc_cpu(adata, mode, min_cells, min_genes, nmads, max_cells_ratio, max_genes_ratio, batch_key, doublets, path_viz, tresh, mt_startswith, mt_genes)
    250 if doublets==True:
    251     # Post doublets removal QC plot
    252     print('Begin of post doublets removal and QC plot')
--> 253     sc.external.pp.scrublet(adata, random_state=1234,batch_key=batch_key)
    254     adata_remove = adata[adata.obs['predicted_doublet'], :]
    255     removed_cells.extend(list(adata_remove.obs_names))

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/sklearn/utils/deprecation.py:88, in deprecated._decorate_fun.<locals>.wrapped(*args, **kwargs)
     85 @functools.wraps(fun)
     86 def wrapped(*args, **kwargs):
     87     warnings.warn(msg, category=FutureWarning)
---> 88     return fun(*args, **kwargs)

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/legacy_api_wrap/__init__.py:80, in legacy_api.<locals>.wrapper.<locals>.fn_compatible(*args_all, **kw)
     77 @wraps(fn)
     78 def fn_compatible(*args_all: P.args, **kw: P.kwargs) -> R:
     79     if len(args_all) <= n_positional:
---> 80         return fn(*args_all, **kw)
     82     args_pos: P.args
     83     args_pos, args_rest = args_all[:n_positional], args_all[n_positional:]

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/scanpy/preprocessing/_scrublet/__init__.py:267, in scrublet(adata, adata_sim, batch_key, sim_doublet_ratio, expected_doublet_rate, stdev_doublet_rate, synthetic_doublet_umi_subsampling, knn_dist_metric, normalize_variance, log_transform, mean_center, n_prin_comps, use_approx_neighbors, get_doublet_neighbor_parents, n_neighbors, threshold, verbose, copy, random_state)
    263 scrubbed_obs = pd.concat([scrub["obs"] for scrub in scrubbed])
    265 # Now reset the obs to get the scrublet scores
--> 267 adata.obs = scrubbed_obs.loc[adata.obs_names.values]
    269 # Save the .uns from each batch separately
    271 adata.uns["scrublet"] = {}

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/anndata/_core/anndata.py:828, in AnnData.obs(self, value)
    826 @obs.setter
    827 def obs(self, value: pd.DataFrame):
--> 828     self._set_dim_df(value, "obs")

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/anndata/_core/anndata.py:767, in AnnData._set_dim_df(self, value, attr)
    765 if not isinstance(value, pd.DataFrame):
    766     raise ValueError(f"Can only assign pd.DataFrame to {attr}.")
--> 767 value_idx = self._prep_dim_index(value.index, attr)
    768 if self.is_view:
    769     self._init_as_actual(self.copy())

File ~/miniconda3/envs/omicverse/lib/python3.10/site-packages/anndata/_core/anndata.py:781, in AnnData._prep_dim_index(self, value, attr)
    776 """Prepares index to be uses as obs_names or var_names for AnnData object.AssertionError
    777 
    778 If a pd.Index is passed, this will use a reference, otherwise a new index object is created.
    779 """
    780 if self.shape[attr == "var"] != len(value):
--> 781     raise ValueError(
    782         f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}"
    783     )
    784 if isinstance(value, pd.Index) and not isinstance(
    785     value.name, (str, type(None))
    786 ):
    787     raise ValueError(
    788         f"AnnData expects .{attr}.index.name to be a string or None, "
    789         f"but you passed a name of type {type(value.name).__name__!r}"
    790     )

ValueError: Length of passed value for obs_names is 62584, but this AnnData has shape: (58528, 27998)

Hi,

You need to set the obs_names of adata unique before quantity control.