massanishi/document_similarity_algorithms_experiments

TypeError: prepare_for_model() got an unexpected keyword argument 'padding'

eva806 opened this issue · 0 comments

while running BERT model I am getting error in below line.
base_embeddings_sentences = model.encode(sentences)

Error stack is as below: please help with resolution


TypeError Traceback (most recent call last)
in
1 model = SentenceTransformer('bert-base-nli-mean-tokens')
2 sentences = sent_tokenize(base_document)
----> 3 base_embeddings_sentences = model.encode(sentences)
4 base_embeddings = np.mean(np.array(base_embeddings_sentences), axis=0)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sentence_transformers\SentenceTransformer.py in encode(self, sentences, batch_size, show_progress_bar, output_value, convert_to_numpy, convert_to_tensor, is_pretokenized, device, num_workers)
174 iterator = tqdm(inp_dataloader, desc="Batches")
175
--> 176 for features in iterator:
177 for feature_name in features:
178 features[feature_name] = features[feature_name].to(device)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\tqdm\notebook.py in iter(self, *args, **kwargs)
220 def iter(self, *args, **kwargs):
221 try:
--> 222 for obj in super(tqdm_notebook, self).iter(*args, **kwargs):
223 # return super(tqdm...) will not catch exception
224 yield obj

~\AppData\Local\Continuum\anaconda3\lib\site-packages\tqdm\std.py in iter(self)
1085 """), fp_write=getattr(self.fp, 'write', sys.stderr.write))
1086
-> 1087 for obj in iterable:
1088 yield obj
1089 # Update and possibly print the progressbar.

~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\data\dataloader.py in next(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \

~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\data\dataloader.py in _next_data(self)
473 def _next_data(self):
474 index = self._next_index() # may raise StopIteration
--> 475 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
476 if self._pin_memory:
477 data = _utils.pin_memory.pin_memory(data)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\torch\utils\data_utils\fetch.py in fetch(self, possibly_batched_index)
45 else:
46 data = self.dataset[possibly_batched_index]
---> 47 return self.collate_fn(data)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sentence_transformers\SentenceTransformer.py in smart_batching_collate_text_only(self, batch)
428
429 for text in batch:
--> 430 sentence_features = self.get_sentence_features(text, max_seq_len)
431 for feature_name in sentence_features:
432 if feature_name not in feature_lists:

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sentence_transformers\SentenceTransformer.py in get_sentence_features(self, *features)
327
328 def get_sentence_features(self, *features):
--> 329 return self._first_module().get_sentence_features(*features)
330
331 def get_sentence_embedding_dimension(self):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\sentence_transformers\models\Transformer.py in get_sentence_features(self, tokens, pad_seq_length)
75
76 if len(tokens) == 0 or isinstance(tokens[0], int):
---> 77 return self.tokenizer.prepare_for_model(tokens, max_length=pad_seq_length, padding='max_length', return_tensors='pt', truncation=True, prepend_batch_axis=True)
78 else:
79 return self.tokenizer.prepare_for_model(tokens[0], tokens[1], max_length=pad_seq_length, padding='max_length', return_tensors='pt', truncation='longest_first', prepend_batch_axis=True)

TypeError: prepare_for_model() got an unexpected keyword argument 'padding'