
ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")

Description of the bug

import os
os.environ['R_HOME'] = '/data_disk/ST01/Software/conda_env/scirpy/lib/R'
os.environ['PATH'] = '/data_disk/ST01/Software/conda_env/scirpy/lib/R/bin:' + os.environ['PATH']
import warnings

    ".*IProgress not found*",
warnings.simplefilter(action="ignore", category=FutureWarning)

from palmotif import compute_motif, svg_logo
import scanpy as sc
import dandelion as ddl
import scirpy as ir
import pandas as pd
import numpy as np
import seaborn as sb

import os

import matplotlib as mpl
import matplotlib.pyplot as plt

warnings.simplefilter(action="ignore", category=pd.errors.DtypeWarning)
path_gex = "/data_disk/ST01/User/zhanghh/Scripts/jupyterlab/sc-best-practices/data/BCR_01_preprocessed.h5ad"
adata_bcr = sc.read(path_gex)
adata = adata_bcr[adata_bcr.obs["patient_id"].isin(["COVID-064", "COVID-014"])].copy()
vdjx = ddl.from_scirpy(adata)
WARNING: Non-standard locus name ignored: Multi 
Dandelion class object with n_obs = 10049 and n_contigs = 23320
    data: 'sequence_id', 'sequence', 'rev_comp', 'productive', 'v_call', 'd_call', 'j_call', 'sequence_alignment', 'germline_alignment', 'junction', 'junction_aa', 'v_cigar', 'd_cigar', 'j_cigar', 'c_call', 'consensus_count', 'duplicate_count', 'locus', 'cell_id', 'multi_chain', 'patient_id', 'is_cell', 'receptor_subtype', 'chain_pairing', 'receptor_type', 'high_confidence', 'rearrangement_status'
    metadata: 'locus_VDJ', 'locus_VJ', 'productive_VDJ', 'productive_VJ', 'v_call_VDJ', 'd_call_VDJ', 'j_call_VDJ', 'v_call_VJ', 'j_call_VJ', 'c_call_VDJ', 'c_call_VJ', 'junction_VDJ', 'junction_VJ', 'junction_aa_VDJ', 'junction_aa_VJ', 'v_call_B_VDJ', 'd_call_B_VDJ', 'j_call_B_VDJ', 'v_call_B_VJ', 'j_call_B_VJ', 'c_call_B_VDJ', 'c_call_B_VJ', 'productive_B_VDJ', 'productive_B_VJ', 'duplicate_count_B_VDJ', 'duplicate_count_B_VJ', 'v_call_VDJ_main', 'v_call_VJ_main', 'd_call_VDJ_main', 'j_call_VDJ_main', 'j_call_VJ_main', 'c_call_VDJ_main', 'c_call_VJ_main', 'v_call_B_VDJ_main', 'd_call_B_VDJ_main', 'j_call_B_VDJ_main', 'v_call_B_VJ_main', 'j_call_B_VJ_main', 'isotype', 'isotype_status', 'locus_status', 'chain_status', 'rearrangement_status_VDJ', 'rearrangement_status_VJ'
vdjx.data["v_call"].replace("", np.nan, inplace=True)
vdjx.data.dropna(subset=["v_call"], inplace=True)

vdjx.data["j_call"].replace("", np.nan, inplace=True)
vdjx.data.dropna(subset=["j_call"], inplace=True)

vdjx.data["junction_aa"].replace("", np.nan, inplace=True)
vdjx.data.dropna(subset=["junction_aa"], inplace=True)

vdjx.data["junction_length"] = [len(a) for a in vdjx.data["junction_aa"]]
ddl.pp.calculate_threshold(vdjx, model="hh_s5f", plot=False)
/data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/site-packages/rpy2/robjects/pandas2ri.py:65: UserWarning: Error while trying to convert the column "productive". Fall back to string conversion. The error is: Series can only be of one type, or None (and here we have <class 'str'> and <class 'bool'>). If happening with a pandas DataFrame the method infer_objects() will normalize data types before conversion.
R[write to console]: Error in (function (db, sequenceColumn = "junction", vCallColumn = "v_call",  : 
  The locus column contains invalid loci annotations.
ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")
### Minimal reproducible example

ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")

The error message produced by the code above

FileNotFoundError                         Traceback (most recent call last)
Cell In[10], line 1
----> 1 ddl.tl.define_clones(vdjx, key_added="changeo_clone_id", model="hh_s5f")

File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/site-packages/dandelion/tools/_tools.py:1273, in define_clones(vdj_data, dist, action, model, norm, doublets, fileformat, ncpu, dirs, outFilePrefix, key_added, verbose)
   1270     return (heavy_df, light_df)
   1272 logg.info("Running command: %s\n" % (" ".join(cmd)))
-> 1273 run(cmd)
   1275 h_df, l_df = _lightCluster(
   1276     h_file2, l_file, outfile, doublets=doublets, fileformat=fileformat
   1277 )
   1279 h_df = load_data(h_df)

File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:505, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
    502     kwargs['stdout'] = PIPE
    503     kwargs['stderr'] = PIPE
--> 505 with Popen(*popenargs, **kwargs) as process:
    506     try:
    507         stdout, stderr = process.communicate(input, timeout=timeout)

File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:951, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask)
    947         if self.text_mode:
    948             self.stderr = io.TextIOWrapper(self.stderr,
    949                     encoding=encoding, errors=errors)
--> 951     self._execute_child(args, executable, preexec_fn, close_fds,
    952                         pass_fds, cwd, env,
    953                         startupinfo, creationflags, shell,
    954                         p2cread, p2cwrite,
    955                         c2pread, c2pwrite,
    956                         errread, errwrite,
    957                         restore_signals,
    958                         gid, gids, uid, umask,
    959                         start_new_session)
    960 except:
    961     # Cleanup if the child failed starting.
    962     for f in filter(None, (self.stdin, self.stdout, self.stderr)):

File /data_disk/ST01/Software/conda_env/scirpy/lib/python3.9/subprocess.py:1821, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, gid, gids, uid, umask, start_new_session)
   1819     if errno_num != 0:
   1820         err_msg = os.strerror(errno_num)
-> 1821     raise child_exception_type(errno_num, err_msg, err_filename)
   1822 raise child_exception_type(err_msg)

FileNotFoundError: [Errno 2] No such file or directory: 'DefineClones.py'

Version information

dandelion==0.3.1 pandas==1.4.4 numpy==1.24.3 matplotlib==3.7.1 networkx==3.1 scipy==1.10.1

scanpy==1.9.3 anndata==0.9.1 umap==0.5.3 numpy==1.24.3 scipy==1.10.1 pandas==1.4.4 scikit-learn==1.2.2 statsmodels==0.14.0 python-igraph==0.10.4 pynndescent==0.5.10

hi the errror message is telling you that DefineClones.py is not being found - this is a executable file provided by changeo. As i'm not sure how you installed changeo, i can only suggest that you uninstall and install changeo again, perhaps from pypi.

Ngort commented

This worked for me (though it may be exactly what you already did).
Run "which DefineClones.py" and then append the directory of that to the interpreter's environment through os.environ.