ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")
hai178912522 opened this issue · 2 comments
Description of the bug
import os
os.environ['R_HOME'] = '/data_disk/ST01/Software/conda_env/scirpy/lib/R'
os.environ['PATH'] = '/data_disk/ST01/Software/conda_env/scirpy/lib/R/bin:' + os.environ['PATH']
import warnings
warnings.filterwarnings(
"ignore",
".*IProgress not found*",
)
warnings.simplefilter(action="ignore", category=FutureWarning)
from palmotif import compute_motif, svg_logo
import scanpy as sc
import dandelion as ddl
import scirpy as ir
import pandas as pd
import numpy as np
import seaborn as sb
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
warnings.simplefilter(action="ignore", category=pd.errors.DtypeWarning)
sc.logging.print_versions()
-----
anndata 0.9.1
scanpy 1.9.3
-----
Bio 1.81
Levenshtein 0.21.0
PIL 9.5.0
adjustText 0.8
airr 1.4.1
argcomplete NA
asttokens NA
backcall 0.2.0
cffi 1.15.1
changeo 1.3.0
colorama 0.4.6
comm 0.1.3
cycler 0.10.0
cython_runtime NA
dandelion 0.3.1
dateutil 2.8.2
debugpy 1.6.7
decorator 5.1.1
distance NA
executing 1.2.0
fontTools 4.39.4
h5py 3.8.0
igraph 0.10.4
importlib_resources NA
ipykernel 6.23.1
jedi 0.18.2
jinja2 3.1.2
joblib 1.2.0
kiwisolver 1.4.4
leidenalg 0.9.1
llvmlite 0.40.0
markupsafe 2.1.3
matplotlib 3.7.1
mizani 0.9.0
mpl_toolkits NA
natsort 8.3.1
networkx 3.1
numba 0.57.0
numpy 1.24.3
packaging 23.1
palettable 3.3.3
palmotif NA
pandas 1.4.4
parasail 1.3.3
parso 0.8.3
patsy 0.5.3
pexpect 4.8.0
pickleshare 0.7.5
pkg_resources NA
platformdirs 3.5.1
plotnine 0.10.1
polyleven NA
presto 0.7.1
prompt_toolkit 3.0.38
psutil 5.9.5
ptyprocess 0.7.0
pure_eval 0.2.2
pydev_ipython NA
pydevconsole NA
pydevd 2.9.5
pydevd_file_utils NA
pydevd_plugins NA
pydevd_tracing NA
pygments 2.15.1
pyparsing 3.0.9
pytz 2023.3
rapidfuzz 2.15.1
rpy2 3.5.11
scipy 1.10.1
scirpy 0.12.2
seaborn 0.12.2
session_info 1.0.0
setuptools 67.7.2
setuptools_scm NA
six 1.16.0
sklearn 1.2.2
stack_data 0.6.2
statsmodels 0.14.0
svgwrite 1.4.3
texttable 1.6.7
threadpoolctl 3.1.0
tornado 6.3.2
tqdm 4.65.0
tracerlib NA
traitlets 5.9.0
typing_extensions NA
tzlocal NA
wcwidth 0.2.6
yaml 6.0
yamlordereddictloader NA
zipp NA
zmq 25.1.0
zoneinfo NA
-----
IPython 8.14.0
jupyter_client 8.2.0
jupyter_core 5.3.0
-----
Python 3.9.16 | packaged by conda-forge | (main, Feb 1 2023, 21:39:03) [GCC 11.3.0]
Linux-5.15.0-71-generic-x86_64-with-glibc2.35
-----
Session information updated at 2023-06-05 16:04
path_gex = "/data_disk/ST01/User/zhanghh/Scripts/jupyterlab/sc-best-practices/data/BCR_01_preprocessed.h5ad"
adata_bcr = sc.read(path_gex)
adata = adata_bcr[adata_bcr.obs["patient_id"].isin(["COVID-064", "COVID-014"])].copy()
vdjx = ddl.from_scirpy(adata)
vdjx
WARNING: Non-standard locus name ignored: Multi
Dandelion class object with n_obs = 10049 and n_contigs = 23320
data: 'sequence_id', 'sequence', 'rev_comp', 'productive', 'v_call', 'd_call', 'j_call', 'sequence_alignment', 'germline_alignment', 'junction', 'junction_aa', 'v_cigar', 'd_cigar', 'j_cigar', 'c_call', 'consensus_count', 'duplicate_count', 'locus', 'cell_id', 'multi_chain', 'patient_id', 'is_cell', 'receptor_subtype', 'chain_pairing', 'receptor_type', 'high_confidence', 'rearrangement_status'
metadata: 'locus_VDJ', 'locus_VJ', 'productive_VDJ', 'productive_VJ', 'v_call_VDJ', 'd_call_VDJ', 'j_call_VDJ', 'v_call_VJ', 'j_call_VJ', 'c_call_VDJ', 'c_call_VJ', 'junction_VDJ', 'junction_VJ', 'junction_aa_VDJ', 'junction_aa_VJ', 'v_call_B_VDJ', 'd_call_B_VDJ', 'j_call_B_VDJ', 'v_call_B_VJ', 'j_call_B_VJ', 'c_call_B_VDJ', 'c_call_B_VJ', 'productive_B_VDJ', 'productive_B_VJ', 'duplicate_count_B_VDJ', 'duplicate_count_B_VJ', 'v_call_VDJ_main', 'v_call_VJ_main', 'd_call_VDJ_main', 'j_call_VDJ_main', 'j_call_VJ_main', 'c_call_VDJ_main', 'c_call_VJ_main', 'v_call_B_VDJ_main', 'd_call_B_VDJ_main', 'j_call_B_VDJ_main', 'v_call_B_VJ_main', 'j_call_B_VJ_main', 'isotype', 'isotype_status', 'locus_status', 'chain_status', 'rearrangement_status_VDJ', 'rearrangement_status_VJ'
vdjx.data["v_call"].replace("", np.nan, inplace=True)
vdjx.data.dropna(subset=["v_call"], inplace=True)
vdjx.data["j_call"].replace("", np.nan, inplace=True)
vdjx.data.dropna(subset=["j_call"], inplace=True)
vdjx.data["junction_aa"].replace("", np.nan, inplace=True)
vdjx.data.dropna(subset=["junction_aa"], inplace=True)
vdjx.data["junction_length"] = [len(a) for a in vdjx.data["junction_aa"]]
ddl.pp.calculate_threshold(vdjx, model="hh_s5f", plot=False)
/data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/site-packages/rpy2/robjects/pandas2ri.py:65: UserWarning: Error while trying to convert the column "productive". Fall back to string conversion. The error is: Series can only be of one type, or None (and here we have <class 'str'> and <class 'bool'>). If happening with a pandas DataFrame the method infer_objects() will normalize data types before conversion.
R[write to console]: Error in (function (db, sequenceColumn = "junction", vCallColumn = "v_call", :
The locus column contains invalid loci annotations.
vdjx.threshold
0.13828874822670625
ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[10], line 1
----> 1 ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")
File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/site-packages/dandelion/tools/_tools.py:1273, in define_clones(vdj_data, dist, action, model, norm, doublets, fileformat, ncpu, dirs, outFilePrefix, key_added, verbose)
1270 return (heavy_df, light_df)
1272 logg.info("Running command: %s\n" % (" ".join(cmd)))
-> 1273 run(cmd)
1275 h_df, l_df = _lightCluster(
1276 h_file2, l_file, outfile, doublets=doublets, fileformat=fileformat
1277 )
1279 h_df = load_data(h_df)
File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:505, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
502 kwargs['stdout'] = PIPE
503 kwargs['stderr'] = PIPE
--> 505 with Popen(*popenargs, **kwargs) as process:
506 try:
507 stdout, stderr = process.communicate(input, timeout=timeout)
File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:951, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask)
947 if self.text_mode:
948 self.stderr = io.TextIOWrapper(self.stderr,
949 encoding=encoding, errors=errors)
--> 951 self._execute_child(args, executable, preexec_fn, close_fds,
952 pass_fds, cwd, env,
953 startupinfo, creationflags, shell,
954 p2cread, p2cwrite,
955 c2pread, c2pwrite,
956 errread, errwrite,
957 restore_signals,
958 gid, gids, uid, umask,
959 start_new_session)
960 except:
961 # Cleanup if the child failed starting.
962 for f in filter(None, (self.stdin, self.stdout, self.stderr)):
File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:1821, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, gid, gids, uid, umask, start_new_session)
1819 if errno_num != 0:
1820 err_msg = os.strerror(errno_num)
-> 1821 raise child_exception_type(errno_num, err_msg, err_filename)
1822 raise child_exception_type(err_msg)
FileNotFoundError: [Errno 2] No such file or directory: 'DefineClones.py'
### Minimal reproducible example
```python
ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")
The error message produced by the code above
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[10], line 1
----> 1 ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")
File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/site-packages/dandelion/tools/_tools.py:1273, in define_clones(vdj_data, dist, action, model, norm, doublets, fileformat, ncpu, dirs, outFilePrefix, key_added, verbose)
1270 return (heavy_df, light_df)
1272 logg.info("Running command: %s\n" % (" ".join(cmd)))
-> 1273 run(cmd)
1275 h_df, l_df = _lightCluster(
1276 h_file2, l_file, outfile, doublets=doublets, fileformat=fileformat
1277 )
1279 h_df = load_data(h_df)
File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:505, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
502 kwargs['stdout'] = PIPE
503 kwargs['stderr'] = PIPE
--> 505 with Popen(*popenargs, **kwargs) as process:
506 try:
507 stdout, stderr = process.communicate(input, timeout=timeout)
File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:951, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask)
947 if self.text_mode:
948 self.stderr = io.TextIOWrapper(self.stderr,
949 encoding=encoding, errors=errors)
--> 951 self._execute_child(args, executable, preexec_fn, close_fds,
952 pass_fds, cwd, env,
953 startupinfo, creationflags, shell,
954 p2cread, p2cwrite,
955 c2pread, c2pwrite,
956 errread, errwrite,
957 restore_signals,
958 gid, gids, uid, umask,
959 start_new_session)
960 except:
961 # Cleanup if the child failed starting.
962 for f in filter(None, (self.stdin, self.stdout, self.stderr)):
File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:1821, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, gid, gids, uid, umask, start_new_session)
1819 if errno_num != 0:
1820 err_msg = os.strerror(errno_num)
-> 1821 raise child_exception_type(errno_num, err_msg, err_filename)
1822 raise child_exception_type(err_msg)
FileNotFoundError: [Errno 2] No such file or directory: 'DefineClones.py'
OS information
ubuntu
Version information
dandelion==0.3.1 pandas==1.4.4 numpy==1.24.3 matplotlib==3.7.1 networkx==3.1 scipy==1.10.1
scanpy==1.9.3 anndata==0.9.1 umap==0.5.3 numpy==1.24.3 scipy==1.10.1 pandas==1.4.4 scikit-learn==1.2.2 statsmodels==0.14.0 python-igraph==0.10.4 pynndescent==0.5.10
Additional context
No response
hi the errror message is telling you that DefineClones.py
is not being found - this is a executable file provided by changeo
. As i'm not sure how you installed changeo
, i can only suggest that you uninstall and install changeo
again, perhaps from pypi
.
This worked for me (though it may be exactly what you already did).
Run "which DefineClones.py" and then append the directory of that to the interpreter's environment through os.environ.