cassiebreviu/StableDiffusion

How do I get cliptokenizer.onnx

Closed this issue · 5 comments

How do I get cliptokenizer.onnx? I am converting to a 512 size model of stable diffusion2.1, but this is not supported

This is created with ONNX Runtime Extension. Learn more here: https://onnxruntime.ai/docs/extensions/

What I mean is where to download the original pytorch model. I now want to convert the dictionary of stable diffusion. Thanks.

You are referencing this model in the repo right? cliptokenizer.onnx

That model is generated with ORT extensions.

How to generate cliptokenizer.onnx on stable-diffusion 2.1.

  import numpy as np
  import onnxruntime as _ort
  import onnx
  from pathlib import Path
  from onnx import helper, onnx_pb as onnx_proto
  from transformers import CLIPTokenizer, CLIPTokenizerFast
  from onnxruntime_extensions import (
      make_onnx_model,
      get_library_path as _get_library_path,
      PyOrtFunction)
  from onnxruntime_extensions.cvt import HFTokenizerConverter
  
  def _get_file_content(path):
      with open(path, "rb") as file:
          return file.read()
  
  def _create_test_model(**kwargs):
      vocab_file = kwargs["vocab_file"]
      merges_file = kwargs["merges_file"]
      max_length = kwargs["max_length"]
  
      input1 = helper.make_tensor_value_info(
          'string_input', onnx_proto.TensorProto.STRING, [None])
      output1 = helper.make_tensor_value_info(
          'input_ids', onnx_proto.TensorProto.INT64, ["batch_size", "num_input_ids"])
      output2 = helper.make_tensor_value_info(
          'attention_mask', onnx_proto.TensorProto.INT64, ["batch_size", "num_attention_masks"])
      output3 = helper.make_tensor_value_info(
          'offset_mapping', onnx_proto.TensorProto.INT64, ["batch_size", "num_offsets", 2])
  
      if kwargs["attention_mask"]:
          if kwargs["offset_map"]:
              node = [helper.make_node(
                  'CLIPTokenizer', ['string_input'],
                  ['input_ids', 'attention_mask', 'offset_mapping'], vocab=_get_file_content(vocab_file),
                  merges=_get_file_content(merges_file), name='bpetok', padding_length=max_length,
                  domain='ai.onnx.contrib')]
  
              graph = helper.make_graph(node, 'test0', [input1], [output1, output2, output3])
              model = make_onnx_model(graph)
          else:
              node = [helper.make_node(
                  'CLIPTokenizer', ['string_input'], ['input_ids', 'attention_mask'], vocab=_get_file_content(vocab_file),
                  merges=_get_file_content(merges_file), name='bpetok', padding_length=max_length,
                  domain='ai.onnx.contrib')]
  
              graph = helper.make_graph(node, 'test0', [input1], [output1, output2])
              model = make_onnx_model(graph)
      else:
          node = [helper.make_node(
              'CLIPTokenizer', ['string_input'], ['input_ids'], vocab=_get_file_content(vocab_file),
              merges=_get_file_content(merges_file), name='bpetok', padding_length=max_length,
              domain='ai.onnx.contrib')]
  
          graph = helper.make_graph(node, 'test0', [input1], [output1])
          model = make_onnx_model(graph)
  
      return model
  
  
  tokenizer = CLIPTokenizerFast.from_pretrained("openai/clip-vit-base-patch32")
  temp_dir = Path('./temp_onnxclip')
  temp_dir.mkdir(parents=True, exist_ok=True)
  files = tokenizer.save_vocabulary(str(temp_dir))
  tokjson = files[0]
  merges = files[1]
  model = _create_test_model(vocab_file=tokjson, merges_file=merges,
                                     max_length=-1, attention_mask=True, offset_map=False)
  onnx.save(model, 'test.onnx')