jessevig/bertviz

Using [MASK] in a sentence.

bickleigh opened this issue · 0 comments

Here is a program that takes in a sentence with a mask for a missing word. It finally outputs a heat map .png graphic. (All 144 of them lol) It would be nice to be able to see the final results through bertviz. I have had a play around with some of the notebooks and tried various builds. But I can't seem to get it to do what the notebooks so brilliantly show. The issue seems to be around

inputs = tokenizer(text, return_tensors="tf")
mask_token_index = get_mask_token_index(tokenizer.mask_token_id, inputs)

I can't seem to integrate this into one of the notebooks. Do you have an example notebook that demonstrates using and predicting a [MASK]
This is the code I have:
import sys
import tensorflow as tf
from PIL import Image, ImageDraw, ImageFont
from transformers import AutoTokenizer,AutoModel, TFBertForMaskedLM
from bertviz import head_view

Pre-trained masked language model

MODEL = "bert-base-uncased"

Number of predictions to generate

K = 3

Constants for generating attention diagrams

FONT = ImageFont.truetype("assets/fonts/OpenSans-Regular.ttf", 28)
GRID_SIZE = 40
PIXELS_PER_WORD = 200

def main():
#text = input("Text: ")
text = "Then I picked up a [MASK] from the table."

# Tokenize input
tokenizer = AutoTokenizer.from_pretrained(MODEL)
inputs = tokenizer(text, return_tensors="tf")
mask_token_index = get_mask_token_index(tokenizer.mask_token_id, inputs)
if mask_token_index is None:
    sys.exit(f"Input must include mask token {tokenizer.mask_token}.")

# Use model to process input
model = TFBertForMaskedLM.from_pretrained(MODEL)
result = model(**inputs, output_attentions=True)

# Generate predictions
mask_token_logits = result.logits[0, mask_token_index]
top_tokens = tf.math.top_k(mask_token_logits, K).indices.numpy()
for token in top_tokens:
    print(text.replace(tokenizer.mask_token, tokenizer.decode([token])))

# Visualize attentions
visualize_attentions(inputs.tokens(), result.attentions)

def get_mask_token_index(mask_token_id, inputs):
"""
Return the index of the token with the specified mask_token_id, or
None if not present in the inputs.
"""
input_ids = inputs['input_ids'][0].numpy()
for i, token_id in enumerate(input_ids):
if token_id == mask_token_id:
return i
return None

def get_color_for_attention_score(attention_score):
"""
Return a tuple of three integers representing a shade of gray for the
given attention_score. Each value should be in the range [0, 255].
"""
gray_value = int(attention_score * 255)
return (gray_value, gray_value, gray_value)

def visualize_attentions(tokens, attentions):
"""
Produce a graphical representation of self-attention scores for all layers and heads.

For each attention layer, one diagram should be generated for each
attention head in the layer. Each diagram should include the list of
`tokens` in the sentence. The filename for each diagram should
include both the layer number (starting count from 1) and head number
(starting count from 1).
"""
num_layers = len(attentions)
num_heads = len(attentions[0][0])

for layer_number in range(1, num_layers + 1):
    for head_number in range(1, num_heads + 1):
        generate_diagram(
            layer_number,
            head_number,
            tokens,
            attentions[layer_number - 1][0][head_number - 1]
        )

def generate_diagram(layer_number, head_number, tokens, attention_weights):
"""
Generate a diagram representing the self-attention scores for a single
attention head. The diagram shows one row and column for each of the
tokens, and cells are shaded based on attention_weights, with lighter
cells corresponding to higher attention scores.

The diagram is saved with a filename that includes both the `layer_number`
and `head_number`.
"""
# Create new image
image_size = GRID_SIZE * len(tokens) + PIXELS_PER_WORD
img = Image.new("RGBA", (image_size, image_size), "black")
draw = ImageDraw.Draw(img)

# Draw each token onto the image
for i, token in enumerate(tokens):
    # Draw token columns
    token_image = Image.new("RGBA", (image_size, image_size), (0, 0, 0, 0))
    token_draw = ImageDraw.Draw(token_image)
    token_draw.text(
        (image_size - PIXELS_PER_WORD, PIXELS_PER_WORD + i * GRID_SIZE),
        token,
        fill="white",
        font=FONT
    )
    token_image = token_image.rotate(90)
    img.paste(token_image, mask=token_image)

    # Draw token rows
    _, _, width, _ = draw.textbbox((0, 0), token, font=FONT)
    draw.text(
        (PIXELS_PER_WORD - width, PIXELS_PER_WORD + i * GRID_SIZE),
        token,
        fill="white",
        font=FONT
    )

# Draw each word
for i in range(len(tokens)):
    y = PIXELS_PER_WORD + i * GRID_SIZE
    for j in range(len(tokens)):
        x = PIXELS_PER_WORD + j * GRID_SIZE
        color = get_color_for_attention_score(attention_weights[i][j])
        draw.rectangle((x, y, x + GRID_SIZE, y + GRID_SIZE), fill=color)

# Save image
img.save(f"Attention_Layer{layer_number}_Head{head_number}.png")

if name == "main":
main()
any help or pointer would be gratefully received... :)