How to do transfer learning on a pretrained downstream task model?
Closed this issue · 7 comments
Hi @ohmeow ,
Is it possible to use blurr
to do achieve this? I'm trying to do transfer learning on a pretrained NER task model with 39 labels (instead of a LM model like in your example) to a smaller NER dataset with only 5 labels. Unfortunately, I'm getting stuck on how to do so.
I tried 2 different ways but all lead to errors:
- Fill in
config.num_labels
task = HF_TASKS_AUTO.TokenClassification
pretrained_model_name = 'cahya/bert-base-indonesian-NER'
config = AutoConfig.from_pretrained(pretrained_model_name)
config.num_labels = len(labels) # 5
hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name,
task=task,
config=config)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-24-b2c1babcdb91> in <module>
4 config.num_labels = len(labels) # 5
5
----> 6 hf_arch, hf_config, hf_tokenizer, hf_model = BLURR_MODEL_HELPER.get_hf_objects(pretrained_model_name,
7 task=task,
8 config=config)
/opt/conda/envs/fastai/lib/python3.8/site-packages/blurr/utils.py in get_hf_objects(self, pretrained_model_name_or_path, task, config, tokenizer_cls, model_cls, config_kwargs, tokenizer_kwargs, model_kwargs, cache_dir)
175 model_cls = self.get_models(arch="auto", task=task.name)[0]
176
--> 177 model = model_cls.from_pretrained(pretrained_model_name_or_path,
178 config=config,
179 cache_dir=cache_dir,
/opt/conda/envs/fastai/lib/python3.8/site-packages/transformers/models/auto/modeling_auto.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1611
1612 if type(config) in MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys():
-> 1613 return MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING[type(config)].from_pretrained(
1614 pretrained_model_name_or_path, *model_args, config=config, **kwargs
1615 )
/opt/conda/envs/fastai/lib/python3.8/site-packages/transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1155 )
1156 if len(error_msgs) > 0:
-> 1157 raise RuntimeError(
1158 "Error(s) in loading state_dict for {}:\n\t{}".format(
1159 model.__class__.__name__, "\n\t".join(error_msgs)
RuntimeError: Error(s) in loading state_dict for BertForTokenClassification:
size mismatch for classifier.weight: copying a param with shape torch.Size([39, 768]) from checkpoint, the shape in current model is torch.Size([5, 768]).
size mismatch for classifier.bias: copying a param with shape torch.Size([39]) from checkpoint, the shape in current model is torch.Size([5]).
- Add in an extra Linear(39, 5) layer at the end:
model = HF_BaseModelWrapper(nn.Sequential(hf_model, nn.Linear(39, 5)))
But when I calllearn.fit()
, it results in this:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-16-8587f3539821> in <module>
----> 1 learn.fit(1)
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in fit(self, n_epoch, lr, wd, cbs, reset_opt)
209 self.opt.set_hypers(lr=self.lr if lr is None else lr)
210 self.n_epoch = n_epoch
--> 211 self._with_events(self._do_fit, 'fit', CancelFitException, self._end_cleanup)
212
213 def _end_cleanup(self): self.dl,self.xb,self.yb,self.pred,self.loss = None,(None,),(None,),None,None
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_fit(self)
200 for epoch in range(self.n_epoch):
201 self.epoch=epoch
--> 202 self._with_events(self._do_epoch, 'epoch', CancelEpochException)
203
204 def fit(self, n_epoch, lr=None, wd=None, cbs=None, reset_opt=False):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_epoch(self)
194
195 def _do_epoch(self):
--> 196 self._do_epoch_train()
197 self._do_epoch_validate()
198
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_epoch_train(self)
186 def _do_epoch_train(self):
187 self.dl = self.dls.train
--> 188 self._with_events(self.all_batches, 'train', CancelTrainException)
189
190 def _do_epoch_validate(self, ds_idx=1, dl=None):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in all_batches(self)
164 def all_batches(self):
165 self.n_iter = len(self.dl)
--> 166 for o in enumerate(self.dl): self.one_batch(*o)
167
168 def _do_one_batch(self):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in one_batch(self, i, b)
182 self.iter = i
183 self._split(b)
--> 184 self._with_events(self._do_one_batch, 'batch', CancelBatchException)
185
186 def _do_epoch_train(self):
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _with_events(self, f, event_type, ex, final)
158
159 def _with_events(self, f, event_type, ex, final=noop):
--> 160 try: self(f'before_{event_type}'); f()
161 except ex: self(f'after_cancel_{event_type}')
162 self(f'after_{event_type}'); final()
/opt/conda/envs/fastai/lib/python3.8/site-packages/fastai/learner.py in _do_one_batch(self)
167
168 def _do_one_batch(self):
--> 169 self.pred = self.model(*self.xb)
170 self('after_pred')
171 if len(self.yb):
/opt/conda/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
/opt/conda/envs/fastai/lib/python3.8/site-packages/blurr/modeling/core.py in forward(self, x)
41 if k not in self.hf_model_fwd_args: del x[k]
42
---> 43 return self.hf_model(**x,
44 output_hidden_states=self.output_hidden_states,
45 output_attentions=self.output_attentions,
/opt/conda/envs/fastai/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
725 result = self._slow_forward(*input, **kwargs)
726 else:
--> 727 result = self.forward(*input, **kwargs)
728 for hook in itertools.chain(
729 _global_forward_hooks.values(),
TypeError: forward() got an unexpected keyword argument 'output_hidden_states'
Sure, this is the gist to the notebook I'm working on: https://gist.github.com/ncduy0303/cd53215ab27c3b870d92319deffb3dcf
@ohmeow, it worked if I used a pretrained language model but not for a pretrained token classification model. I suspect that if I can fine-tune a pretrained NER model on my dataset, the performance will be better?
pretrained_model_name = 'bert-base-multilingual-uncased' # 'cahya/bert-base-indonesian-522M' # working
# pretrained_model_name = 'cahya/bert-base-indonesian-NER' # 'cahya/xlm-roberta-base-indonesian-NER' # pretrained NER models, not working
That is typically the case, yah.
@ncduy0303 : Can I close this issue out? Or you still having issues addressed in this "issue"? Lmk.
Sure, thank you for your help!