`ValueError: y_true must be of shape [batch_size, 1]` for summarisation NLP model.
ktunk opened this issue · 0 comments
ktunk commented
Hi, I am working on summarisation task and I have used hugging-face 'google/mt5-small'
pre-trained model for this. I am facing below ValueError for y_label shape while running below hyperparameter tunning code.
ValueError: y_true must be of shape [batch_size, 1]. Found shape: (None, 128)
Actually, my y_label data is summary text and I am passing its text-encodings to tuner.
code:
from kerastuner import HyperModel
import keras_nlp
import keras_tuner as kt
import transformers
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer
model_checkpoint = 'google/mt5-small'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
max_input_length = 1024
max_target_length = 128
train_texts, val_texts, train_labels, val_labels = train_test_split(data.document,
data.summary,
test_size=0.2,
random_state=42)
#create encodings
#encoding for x (document) data
x_train_encodings = tokenizer(train_texts.tolist(), truncation=True, max_length=max_input_length, padding="max_length", return_tensors='tf')
x_val_encodings = tokenizer(val_texts.tolist(), truncation=True, max_length=max_input_length, padding="max_length", return_tensors='tf')
print(x_train_encodings)
#encoding for y (summary) data
y_train_encodings = tokenizer(train_labels.tolist(), truncation=True, max_length=max_target_length, padding="max_length", return_tensors='tf')
y_val_encodings = tokenizer(val_labels.tolist(), truncation=True, max_length=max_target_length, padding="max_length", return_tensors='tf')
print(y_val_encodings)
BATCH_SIZE = 8
# create dataset
x_inputs = dict(x_train_encodings)
x_inputs['labels'] = y_train_encodings['input_ids']
train_tf_dataset = tf.data.Dataset.from_tensor_slices((x_inputs))
train_tf_dataset = train_tf_dataset.shuffle(len(x_train_encodings)).batch(BATCH_SIZE)
eval_inputs = dict(x_val_encodings)
eval_inputs['labels'] = y_val_encodings['input_ids']
eval_tf_dataset = tf.data.Dataset.from_tensor_slices(eval_inputs)
eval_tf_dataset = eval_tf_dataset.batch(BATCH_SIZE)
# tf_dataset will have this columns: 'input_ids', 'attention_mask', 'labels'
# x_input: 'input_ids', 'attention_mask',
# y_label: 'labels'
# HyperModel class
class MyHyperModel(HyperModel) :
def __init__(self, model_checkpoint):
self.model = TFAutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
def build(self, hp):
hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4, 2e-5])
hp_decay = hp.Int('decay', min_value=10, max_value=100, step=10 )
rouge_n = keras_nlp.metrics.RougeN()
self.model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate), #, decay=hp_decay),
metrics=['accuracy',rouge_n])
return self.model
hyperModel = MyHyperModel(model_checkpoint)
tuner = kt.Hyperband(hyperModel,
objective=kt.Objective("rouge", direction="max"),
max_epochs=20,
factor=3,
overwrite=True,
directory="my_dir",
project_name="tune_hypermodel")
tuner.search(train_tf_dataset,
epochs=25,
batch_size=BATCH_SIZE,
validation_data=eval_tf_dataset,
verbose=2)