Using [MASK] in a sentence.
bickleigh opened this issue · 0 comments
Here is a program that takes in a sentence with a mask for a missing word. It finally outputs a heat map .png graphic. (All 144 of them lol) It would be nice to be able to see the final results through bertviz. I have had a play around with some of the notebooks and tried various builds. But I can't seem to get it to do what the notebooks so brilliantly show. The issue seems to be around
inputs = tokenizer(text, return_tensors="tf")
mask_token_index = get_mask_token_index(tokenizer.mask_token_id, inputs)
I can't seem to integrate this into one of the notebooks. Do you have an example notebook that demonstrates using and predicting a [MASK]
This is the code I have:
import sys
import tensorflow as tf
from PIL import Image, ImageDraw, ImageFont
from transformers import AutoTokenizer,AutoModel, TFBertForMaskedLM
from bertviz import head_view
Pre-trained masked language model
MODEL = "bert-base-uncased"
Number of predictions to generate
K = 3
Constants for generating attention diagrams
FONT = ImageFont.truetype("assets/fonts/OpenSans-Regular.ttf", 28)
GRID_SIZE = 40
PIXELS_PER_WORD = 200
def main():
#text = input("Text: ")
text = "Then I picked up a [MASK] from the table."
# Tokenize input
tokenizer = AutoTokenizer.from_pretrained(MODEL)
inputs = tokenizer(text, return_tensors="tf")
mask_token_index = get_mask_token_index(tokenizer.mask_token_id, inputs)
if mask_token_index is None:
sys.exit(f"Input must include mask token {tokenizer.mask_token}.")
# Use model to process input
model = TFBertForMaskedLM.from_pretrained(MODEL)
result = model(**inputs, output_attentions=True)
# Generate predictions
mask_token_logits = result.logits[0, mask_token_index]
top_tokens = tf.math.top_k(mask_token_logits, K).indices.numpy()
for token in top_tokens:
print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))
# Visualize attentions
visualize_attentions(inputs.tokens(), result.attentions)
def get_mask_token_index(mask_token_id, inputs):
"""
Return the index of the token with the specified mask_token_id
, or
None
if not present in the inputs
.
"""
input_ids = inputs['input_ids'][0].numpy()
for i, token_id in enumerate(input_ids):
if token_id == mask_token_id:
return i
return None
def get_color_for_attention_score(attention_score):
"""
Return a tuple of three integers representing a shade of gray for the
given attention_score
. Each value should be in the range [0, 255].
"""
gray_value = int(attention_score * 255)
return (gray_value, gray_value, gray_value)
def visualize_attentions(tokens, attentions):
"""
Produce a graphical representation of self-attention scores for all layers and heads.
For each attention layer, one diagram should be generated for each
attention head in the layer. Each diagram should include the list of
`tokens` in the sentence. The filename for each diagram should
include both the layer number (starting count from 1) and head number
(starting count from 1).
"""
num_layers = len(attentions)
num_heads = len(attentions[0][0])
for layer_number in range(1, num_layers + 1):
for head_number in range(1, num_heads + 1):
generate_diagram(
layer_number,
head_number,
tokens,
attentions[layer_number - 1][0][head_number - 1]
)
def generate_diagram(layer_number, head_number, tokens, attention_weights):
"""
Generate a diagram representing the self-attention scores for a single
attention head. The diagram shows one row and column for each of the
tokens
, and cells are shaded based on attention_weights
, with lighter
cells corresponding to higher attention scores.
The diagram is saved with a filename that includes both the `layer_number`
and `head_number`.
"""
# Create new image
image_size = GRID_SIZE * len(tokens) + PIXELS_PER_WORD
img = Image.new("RGBA", (image_size, image_size), "black")
draw = ImageDraw.Draw(img)
# Draw each token onto the image
for i, token in enumerate(tokens):
# Draw token columns
token_image = Image.new("RGBA", (image_size, image_size), (0, 0, 0, 0))
token_draw = ImageDraw.Draw(token_image)
token_draw.text(
(image_size - PIXELS_PER_WORD, PIXELS_PER_WORD + i * GRID_SIZE),
token,
fill="white",
font=FONT
)
token_image = token_image.rotate(90)
img.paste(token_image, mask=token_image)
# Draw token rows
_, _, width, _ = draw.textbbox((0, 0), token, font=FONT)
draw.text(
(PIXELS_PER_WORD - width, PIXELS_PER_WORD + i * GRID_SIZE),
token,
fill="white",
font=FONT
)
# Draw each word
for i in range(len(tokens)):
y = PIXELS_PER_WORD + i * GRID_SIZE
for j in range(len(tokens)):
x = PIXELS_PER_WORD + j * GRID_SIZE
color = get_color_for_attention_score(attention_weights[i][j])
draw.rectangle((x, y, x + GRID_SIZE, y + GRID_SIZE), fill=color)
# Save image
img.save(f"Attention_Layer{layer_number}_Head{head_number}.png")
if name == "main":
main()
any help or pointer would be gratefully received... :)