02_training_analysis.ipynb error in "Calling the training functions"
HongjianSun opened this issue · 2 comments
HongjianSun commented
from mvtcr.models.model_selection import run_model_selection
timeout = (20*60)
n_samples = 3
n_gpus = 1
seed = 42
run_model_selection(adata, params_experiment, params_optimization, n_samples, timeout, n_gpus, sampler_seed=seed)
[I 2024-07-25 10:51:06,698] A new study created in RDB with name: haniffa_tutorial
0%| | 0/10 [00:12<?, ?it/s]
[W 2024-07-25 10:51:22,235] Trial 0 failed with parameters: {'dropout': 0.1, 'activation': 'linear', 'rna_hidden': 1500, 'hdim': 200, 'shared_hidden': 100, 'rna_num_layers': 1, 'tfmr_encoding_layers': 4, 'loss_weights_kl': 4.0428727350273357e-07, 'loss_weights_tcr': 0.034702669886504146, 'lr': 1.0994335574766187e-05, 'zdim': 50, 'tfmr_embedding_size': 16, 'tfmr_num_heads': 8, 'tfmr_dropout': 0.15000000000000002} because of the following error: AttributeError("'SparseCSRView' object has no attribute 'A'").
Traceback (most recent call last):
File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
value_or_values = func(trial)
File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py", line 152, in <lambda>
study.optimize(lambda trial: objective(trial, adata, suggest_params, params_experiment, params_optimization),
File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py", line 104, in objective
model.train(params_experiment['n_epochs'], params_architecture['batch_size'], params_architecture['learning_rate'],
File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\vae_base_model.py", line 228, in train
self.additional_evaluation(epoch, save_path)
File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\vae_base_model.py", line 328, in additional_evaluation
score, relation = report_pseudo_metric(self.adata, self, self.optimization_mode_params,
File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\optimization\pseudo_metric.py", line 19, in report_pseudo_metric
summary = run_knn_within_set_evaluation(adata, test_embedding_func, labels, subset='val')
File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\evaluation\kNN.py", line 25, in run_knn_within_set_evaluation
scores[f'weighted_f1_{prediction_label}'] = Metrics.get_knn_f1_within_set(latent_tmp, prediction_label)
File "C:\Users\uqhsun9\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\evaluation\Metrics.py", line 106, in get_knn_f1_within_set
con = latent.obsp['connectivities'].A.astype(bool)
AttributeError: 'SparseCSRView' object has no attribute 'A'
[W 2024-07-25 10:51:22,235] Trial 0 failed with value None.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[5], line 7
5 n_gpus = 1
6 seed = 42
----> 7 run_model_selection(adata, params_experiment, params_optimization, n_samples, timeout, n_gpus, sampler_seed=seed)
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\utils_preprocessing.py:115, in check_if_input_is_mudata.<locals>.wrapper(mudata_gex_key, mudata_airr_key, *args, **kwargs)
113 train, test = func(*args, **kwargs)
114 else:
--> 115 func(*args, **kwargs)
116 #====================
117 #updating mudata
118 if input_is_mu and func_name in ("encode_clonotypes", "encode_tcr", "encode_conditional_var"):
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py:152, in run_model_selection(adata, params_experiment, params_optimization, num_samples, timeout, n_jobs, sampler_seed)
150 suggest_params = get_parameter_functions(params_experiment['model_name'], params_optimization['name'])
151 # study.enqueue_trial(init_params)
--> 152 study.optimize(lambda trial: objective(trial, adata, suggest_params, params_experiment, params_optimization),
153 n_trials=num_samples, timeout=timeout, n_jobs=n_jobs)
155 try:
156 #optuna >=3.0
157 pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\study.py:451, in Study.optimize(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
348 def optimize(
349 self,
350 func: ObjectiveFuncType,
(...)
357 show_progress_bar: bool = False,
358 ) -> None:
359 """Optimize an objective function.
360
361 Optimization is done by choosing a suitable set of hyperparameter values from a given
(...)
449 If nested invocation of this method occurs.
450 """
--> 451 _optimize(
452 study=self,
453 func=func,
454 n_trials=n_trials,
455 timeout=timeout,
456 n_jobs=n_jobs,
457 catch=tuple(catch) if isinstance(catch, Iterable) else (catch,),
458 callbacks=callbacks,
459 gc_after_trial=gc_after_trial,
460 show_progress_bar=show_progress_bar,
461 )
File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py:66, in _optimize(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)
64 try:
65 if n_jobs == 1:
---> 66 _optimize_sequential(
67 study,
68 func,
69 n_trials,
70 timeout,
71 catch,
72 callbacks,
73 gc_after_trial,
74 reseed_sampler_rng=False,
75 time_start=None,
76 progress_bar=progress_bar,
77 )
78 else:
79 if n_jobs == -1:
File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py:163, in _optimize_sequential(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)
160 break
162 try:
--> 163 frozen_trial = _run_trial(study, func, catch)
164 finally:
165 # The following line mitigates memory problems that can be occurred in some
166 # environments (e.g., services that use computing containers such as GitHub Actions).
167 # Please refer to the following PR for further details:
168 # https://github.com/optuna/optuna/pull/325.
169 if gc_after_trial:
File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py:251, in _run_trial(study, func, catch)
244 assert False, "Should not reach."
246 if (
247 frozen_trial.state == TrialState.FAIL
248 and func_err is not None
249 and not isinstance(func_err, catch)
250 ):
--> 251 raise func_err
252 return frozen_trial
File ~\Anaconda3\envs\mvtcr\lib\site-packages\optuna\study\_optimize.py:200, in _run_trial(study, func, catch)
198 with get_heartbeat_thread(trial._trial_id, study._storage):
199 try:
--> 200 value_or_values = func(trial)
201 except exceptions.TrialPruned as e:
202 # TODO(mamu): Handle multi-objective cases.
203 state = TrialState.PRUNED
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py:152, in run_model_selection.<locals>.<lambda>(trial)
150 suggest_params = get_parameter_functions(params_experiment['model_name'], params_optimization['name'])
151 # study.enqueue_trial(init_params)
--> 152 study.optimize(lambda trial: objective(trial, adata, suggest_params, params_experiment, params_optimization),
153 n_trials=num_samples, timeout=timeout, n_jobs=n_jobs)
155 try:
156 #optuna >=3.0
157 pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\model_selection.py:104, in objective(trial, adata_tmp, suggest_params, params_experiment_base, optimization_mode_params)
99 model = utils.select_model_by_name(params_experiment['model_name'])
100 model = model(adata, params_architecture, params_experiment['balanced_sampling'], params_experiment['metadata'],
101 params_experiment['conditional'], optimization_mode_params,
102 params_experiment['label_key'], params_experiment['device'])
--> 104 model.train(params_experiment['n_epochs'], params_architecture['batch_size'], params_architecture['learning_rate'],
105 params_architecture['loss_weights'], params_experiment['kl_annealing_epochs'],
106 params_experiment['early_stop'], params_experiment['save_path'], comet)
108 # plot UMAPs
109 if comet is not None:
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\vae_base_model.py:228, in VAEBaseModel.train(self, n_epochs, batch_size, learning_rate, loss_weights, kl_annealing_epochs, early_stop, save_path, comet)
226 val_loss_summary = self.run_epoch(epoch, phase='val')
227 self.log_losses(val_loss_summary, epoch)
--> 228 self.additional_evaluation(epoch, save_path)
230 if self.do_early_stopping(val_loss_summary['val Loss'], early_stop, save_path, epoch):
231 break
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\vae_base_model.py:328, in VAEBaseModel.additional_evaluation(self, epoch, save_path)
325 score, relation = report_modulation_prediction(self.adata, self, self.optimization_mode_params,
326 epoch, self.comet)
327 elif name == 'pseudo_metric':
--> 328 score, relation = report_pseudo_metric(self.adata, self, self.optimization_mode_params,
329 epoch, self.comet)
330 elif name == 'supervised':
331 score, relation = self.summary_losses['val CLS F1'], operator.gt
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\models\optimization\pseudo_metric.py:19, in report_pseudo_metric(adata, model, optimization_mode_params, epoch, comet)
16 labels = list(prediction_label.keys())
18 test_embedding_func = get_model_prediction_function(model, do_adata=True, metadata=labels)
---> 19 summary = run_knn_within_set_evaluation(adata, test_embedding_func, labels, subset='val')
21 if isinstance(prediction_label, list):
22 summary['pseudo_metric'] = sum(summary.values())
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\evaluation\kNN.py:25, in run_knn_within_set_evaluation(data_full, embedding_function, prediction_labels, subset)
23 latent_tmp = latent_tmp[(latent_tmp.obs[prediction_label] != -99).to_numpy()]
24 #IDEA latent tmp for f1 or calc just a knn graph and eval with that --> speed and mem efficient?
---> 25 scores[f'weighted_f1_{prediction_label}'] = Metrics.get_knn_f1_within_set(latent_tmp, prediction_label)
26 return scores
File ~\Anaconda3\envs\mvtcr\lib\site-packages\mvtcr\evaluation\Metrics.py:106, in get_knn_f1_within_set(latent, column_name)
105 def get_knn_f1_within_set(latent, column_name):
--> 106 con = latent.obsp['connectivities'].A.astype(bool)
107 nearest_neighbor_label = [latent.obs[column_name].values[row].tolist()[0] for row in con]
108 labels_true = latent.obs[column_name].values
AttributeError: 'SparseCSRView' object has no attribute 'A'
ps. I ran the pipeline for 01_preprocessing.ipynb and then try 02_training_analysis.ipynb and I got this error. Is there any solutions? Thanks so much!
Nick
HongjianSun commented
just FYI, 02b_training_analysis_mudata face the same issue
leuschjanphilipp commented
Hi, thanks for trying out mvTCR! #14 is the same issue. As pointed out there, a quick fix is to downgrade scipy to 1.13. If this doesnt work please post your issues there.
Cheers!