SchubertLab/mvTCR

02_training_analysis.ipynb error in "Calling the training functions"

HongjianSun opened this issue · 2 comments

from mvtcr.models.model_selection import run_model_selection

timeout = (20*60)
n_samples = 3
n_gpus = 1
seed = 42
run_model_selection(adata, params_experiment, params_optimization, n_samples, timeout, n_gpus, sampler_seed=seed)

[I 2024-07-25 10:51:06,698] A new study created in RDB with name: haniffa_tutorial
  0%|                                                                                           | 0/10 [00:12<?, ?it/s]
[W 2024-07-25 10:51:22,235] Trial 0 failed with parameters: {'dropout': 0.1, 'activation': 'linear', 'rna_hidden': 1500, 'hdim': 200, 'shared_hidden': 100, 'rna_num_layers': 1, 'tfmr_encoding_layers': 4, 'loss_weights_kl': 4.0428727350273357e-07, 'loss_weights_tcr': 0.034702669886504146, 'lr': 1.0994335574766187e-05, 'zdim': 50, 'tfmr_embedding_size': 16, 'tfmr_num_heads': 8, 'tfmr_dropout': 0.15000000000000002} because of the following error: AttributeError("'SparseCSRView' object has no attribute 'A'").
Traceback (most recent call last):
  File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py", line 152, in <lambda>
    study.optimize(lambda trial: objective(trial, adata, suggest_params, params_experiment, params_optimization),
  File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py", line 104, in objective
    model.train(params_experiment['n_epochs'], params_architecture['batch_size'], params_architecture['learning_rate'],
  File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\vae_base_model.py", line 228, in train
    self.additional_evaluation(epoch, save_path)
  File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\vae_base_model.py", line 328, in additional_evaluation
    score, relation = report_pseudo_metric(self.adata, self, self.optimization_mode_params,
  File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\optimization\pseudo_metric.py", line 19, in report_pseudo_metric
    summary = run_knn_within_set_evaluation(adata, test_embedding_func, labels, subset='val')
  File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\evaluation\kNN.py", line 25, in run_knn_within_set_evaluation
    scores[f'weighted_f1_{prediction_label}'] = Metrics.get_knn_f1_within_set(latent_tmp, prediction_label)
  File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\evaluation\Metrics.py", line 106, in get_knn_f1_within_set
    con = latent.obsp['connectivities'].A.astype(bool)
AttributeError: 'SparseCSRView' object has no attribute 'A'
[W 2024-07-25 10:51:22,235] Trial 0 failed with value None.
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[5], line 7
      5 n_gpus = 1
      6 seed = 42
----> 7 run_model_selection(adata, params_experiment, params_optimization, n_samples, timeout, n_gpus, sampler_seed=seed)

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\utils_preprocessing.py:115, in check_if_input_is_mudata.<locals>.wrapper(mudata_gex_key, mudata_airr_key, *args, **kwargs)
    113 	train, test = func(*args, **kwargs)
    114 else:
--> 115 	func(*args, **kwargs)
    116 #====================
    117 #updating mudata
    118 if input_is_mu and func_name in ("encode_clonotypes", "encode_tcr", "encode_conditional_var"):

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py:152, in run_model_selection(adata, params_experiment, params_optimization, num_samples, timeout, n_jobs, sampler_seed)
    150 suggest_params = get_parameter_functions(params_experiment['model_name'], params_optimization['name'])
    151 # study.enqueue_trial(init_params)
--> 152 study.optimize(lambda trial: objective(trial, adata, suggest_params, params_experiment, params_optimization),
    153                n_trials=num_samples, timeout=timeout, n_jobs=n_jobs)
    155 try:
    156     #optuna >=3.0
    157     pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]

File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\study.py:451, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
    348 def optimize(
    349     self,
    350     func: ObjectiveFuncType,
   (...)
    357     show_progress_bar: bool = False,
    358 ) -> None:
    359     """Optimize an objective function.
    360 
    361     Optimization is done by choosing a suitable set of hyperparameter values from a given
   (...)
    449             If nested invocation of this method occurs.
    450     """
--> 451     _optimize(
    452         study=self,
    453         func=func,
    454         n_trials=n_trials,
    455         timeout=timeout,
    456         n_jobs=n_jobs,
    457         catch=tuple(catch) if isinstance(catch, Iterable) else (catch,),
    458         callbacks=callbacks,
    459         gc_after_trial=gc_after_trial,
    460         show_progress_bar=show_progress_bar,
    461     )

File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py:66, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
     64 try:
     65     if n_jobs == 1:
---> 66         _optimize_sequential(
     67             study,
     68             func,
     69             n_trials,
     70             timeout,
     71             catch,
     72             callbacks,
     73             gc_after_trial,
     74             reseed_sampler_rng=False,
     75             time_start=None,
     76             progress_bar=progress_bar,
     77         )
     78     else:
     79         if n_jobs == -1:

File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py:163, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
    160         break
    162 try:
--> 163     frozen_trial = _run_trial(study, func, catch)
    164 finally:
    165     # The following line mitigates memory problems that can be occurred in some
    166     # environments (e.g., services that use computing containers such as GitHub Actions).
    167     # Please refer to the following PR for further details:
    168     # https://github.com/optuna/optuna/pull/325.
    169     if gc_after_trial:

File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py:251, in _run_trial(study, func, catch)
    244         assert False, "Should not reach."
    246 if (
    247     frozen_trial.state == TrialState.FAIL
    248     and func_err is not None
    249     and not isinstance(func_err, catch)
    250 ):
--> 251     raise func_err
    252 return frozen_trial

File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py:200, in _run_trial(study, func, catch)
    198 with get_heartbeat_thread(trial._trial_id, study._storage):
    199     try:
--> 200         value_or_values = func(trial)
    201     except exceptions.TrialPruned as e:
    202         # TODO(mamu): Handle multi-objective cases.
    203         state = TrialState.PRUNED

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py:152, in run_model_selection.<locals>.<lambda>(trial)
    150 suggest_params = get_parameter_functions(params_experiment['model_name'], params_optimization['name'])
    151 # study.enqueue_trial(init_params)
--> 152 study.optimize(lambda trial: objective(trial, adata, suggest_params, params_experiment, params_optimization),
    153                n_trials=num_samples, timeout=timeout, n_jobs=n_jobs)
    155 try:
    156     #optuna >=3.0
    157     pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py:104, in objective(trial, adata_tmp, suggest_params, params_experiment_base, optimization_mode_params)
     99 model = utils.select_model_by_name(params_experiment['model_name'])
    100 model = model(adata, params_architecture, params_experiment['balanced_sampling'], params_experiment['metadata'],
    101               params_experiment['conditional'], optimization_mode_params,
    102               params_experiment['label_key'], params_experiment['device'])
--> 104 model.train(params_experiment['n_epochs'], params_architecture['batch_size'], params_architecture['learning_rate'],
    105             params_architecture['loss_weights'], params_experiment['kl_annealing_epochs'],
    106             params_experiment['early_stop'], params_experiment['save_path'], comet)
    108 # plot UMAPs
    109 if comet is not None:

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\vae_base_model.py:228, in VAEBaseModel.train(self, n_epochs, batch_size, learning_rate, loss_weights, kl_annealing_epochs, early_stop, save_path, comet)
    226 	val_loss_summary = self.run_epoch(epoch, phase='val')
    227 	self.log_losses(val_loss_summary, epoch)
--> 228 	self.additional_evaluation(epoch, save_path)
    230 if self.do_early_stopping(val_loss_summary['val Loss'], early_stop, save_path, epoch):
    231 	break

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\vae_base_model.py:328, in VAEBaseModel.additional_evaluation(self, epoch, save_path)
    325 	score, relation = report_modulation_prediction(self.adata, self, self.optimization_mode_params,
    326 												   epoch, self.comet)
    327 elif name == 'pseudo_metric':
--> 328 	score, relation = report_pseudo_metric(self.adata, self, self.optimization_mode_params,
    329 										   epoch, self.comet)
    330 elif name == 'supervised':
    331 	score, relation = self.summary_losses['val CLS F1'], operator.gt

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\optimization\pseudo_metric.py:19, in report_pseudo_metric(adata, model, optimization_mode_params, epoch, comet)
     16     labels = list(prediction_label.keys())
     18 test_embedding_func = get_model_prediction_function(model, do_adata=True, metadata=labels)
---> 19 summary = run_knn_within_set_evaluation(adata, test_embedding_func, labels, subset='val')
     21 if isinstance(prediction_label, list):
     22     summary['pseudo_metric'] = sum(summary.values())

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\evaluation\kNN.py:25, in run_knn_within_set_evaluation(data_full, embedding_function, prediction_labels, subset)
     23     latent_tmp = latent_tmp[(latent_tmp.obs[prediction_label] != -99).to_numpy()]
     24     #IDEA latent tmp for f1 or calc just a knn graph and eval with that --> speed and mem efficient?
---> 25     scores[f'weighted_f1_{prediction_label}'] = Metrics.get_knn_f1_within_set(latent_tmp, prediction_label)
     26 return scores

File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\evaluation\Metrics.py:106, in get_knn_f1_within_set(latent, column_name)
    105 def get_knn_f1_within_set(latent, column_name):
--> 106     con = latent.obsp['connectivities'].A.astype(bool)
    107     nearest_neighbor_label = [latent.obs[column_name].values[row].tolist()[0] for row in con]
    108     labels_true = latent.obs[column_name].values

AttributeError: 'SparseCSRView' object has no attribute 'A'


ps. I ran the pipeline for 01_preprocessing.ipynb and then try 02_training_analysis.ipynb and I got this error. Is there any solutions? Thanks so much!
Nick

just FYI, 02b_training_analysis_mudata face the same issue

Hi, thanks for trying out mvTCR! #14 is the same issue. As pointed out there, a quick fix is to downgrade scipy to 1.13. If this doesnt work please post your issues there.
Cheers!