google-research/bert

Use BERT fine-tuned model for Tensorflow serving

gkovacslmi opened this issue ยท 72 comments

I'd like to publish an HTTP REST or GRPC API with TF serving for a BERT model fine-tuned on the SQuAD dataset. I tried two approaches but unfortunately both of them failed.

1.) Convert the files in the output directory (ckpt files, graph.pbtxt...) to the file structure that TF serving expects - saved_model.pb and the data/index files in the variables folder. This resulted in a model loading error when I tried to load it into a Docker container with the 'tensorflow/serving' image.

2.) The other approach was to create a model export after fine-tuning by calling TPUEstimator.export_savedmodel(...). This resulted in the following issue - an internal implementational detail in TF:

NotImplementedError: Operation of type AssignVariableOp (AssignVariableOp) is not supported on the TPU for inference. Execution will fail if this op is used in the graph. Make sure your variables are using variable_scope.

The way I tried to export the fine-tuned model after training is the following:

feature_spec = {
"unique_ids": tf.FixedLenFeature([], tf.int64),
"input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
}

def serving_input_receiver_fn():
    serialized_tf_example = tf.placeholder(dtype=tf.string, shape=None,
                                           name='input_example_tensor')
    
    receiver_tensors = {'examples': serialized_tf_example}
    features = tf.parse_example(serialized_tf_example, feature_spec)
    return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)


estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn, strip_default_attrs=True)

If my issue is unrelated to this repo, can you please redirect it to a place where model checkpoint <--> SavedModel conversion issue are considered to be on-topic?

Thanks in advance for all the help.

the same error!

anyone help...

the detail error is bellow:

NotImplementedError Traceback (most recent call last)
in
----> 1 estimator.export_savedmodel('export', serving_input_receiver_fn)

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in export_savedmodel(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs)
635 checkpoint_path=checkpoint_path,
636 strip_default_attrs=strip_default_attrs,
--> 637 mode=model_fn_lib.ModeKeys.PREDICT)
638
639 def export_saved_model(

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _export_saved_model_for_mode(self, export_dir_base, input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs, mode)
761 as_text=as_text,
762 checkpoint_path=checkpoint_path,
--> 763 strip_default_attrs=strip_default_attrs)
764
765 def _export_all_saved_models(

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra, as_text, checkpoint_path, strip_default_attrs)
879 builder, input_receiver_fn_map, checkpoint_path,
880 strip_default_attrs, save_variables,
--> 881 mode=model_fn_lib.ModeKeys.PREDICT)
882 save_variables = False
883

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _add_meta_graph_for_mode(self, builder, input_receiver_fn_map, checkpoint_path, strip_default_attrs, save_variables, mode, export_tags, check_variables)
2177 mode=mode,
2178 export_tags=export_tags,
-> 2179 check_variables=False))
2180
2181 def _call_model_fn(self, features, labels, mode, config):

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _add_meta_graph_for_mode(self, builder, input_receiver_fn_map, checkpoint_path, strip_default_attrs, save_variables, mode, export_tags, check_variables)
956 labels=getattr(input_receiver, 'labels', None),
957 mode=mode,
--> 958 config=self.config)
959
960 export_outputs = model_fn_lib.export_outputs_for_mode(

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _call_model_fn(self, features, labels, mode, config)
2181 def _call_model_fn(self, features, labels, mode, config):
2182 if mode == _REWRITE_FOR_INFERENCE_MODE:
-> 2183 return self._call_model_fn_for_inference(features, labels, mode, config)
2184 else:
2185 return super(TPUEstimator, self)._call_model_fn(

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _call_model_fn_for_inference(self, features, labels, mode, config)
2224 return tpu_tensors
2225
-> 2226 tpu_tensors_on_cpu = tpu.rewrite_for_inference(computation)
2227 estimator_spec, tensors_dict, tensors = capture.get()
2228

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/framework/python/framework/experimental.py in new_func(*args, **kwargs)
62 'any time, and without warning.',
63 decorator_utils.get_qualified_name(func), func.module)
---> 64 return func(*args, **kwargs)
65 new_func.doc = _add_experimental_function_notice_to_docstring(
66 func.doc)

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in rewrite_for_inference(computation, inputs, infeed_queue, device_assignment, name)
1155 infeed_queue=infeed_queue,
1156 device_assignment=device_assignment,
-> 1157 name=name)
1158 # pylint: enable=undefined-variable

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in rewrite(computation, inputs, infeed_queue, device_assignment, name)
994 infeed_queue=infeed_queue,
995 device_assignment=device_assignment,
--> 996 name=name)[0]
997 # pylint: enable=indexing-exception
998

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in replicate(computation, inputs, infeed_queue, device_assignment, name)
506 """
507 return split_compile_and_replicate(computation, inputs, infeed_queue,
--> 508 device_assignment, name)[1]
509
510

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in split_compile_and_replicate(failed resolving arguments)
657 vscope.set_use_resource(True)
658
--> 659 outputs = computation(*computation_inputs)
660
661 vscope.set_use_resource(saved_use_resource)

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in wrapped_computation(*args, **kwargs)
1141 vscope.set_caching_device(lambda op: op.device)
1142
-> 1143 result = computation(*args, **kwargs)
1144
1145 vscope.set_custom_getter(prev_custom_getter)

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in computation()
2208 # so that building the graph will happen under rewrite_for_inference.
2209 mode = model_fn_lib.ModeKeys.PREDICT
-> 2210 estimator_spec = self._call_model_fn(features, labels, mode, config)
2211
2212 # We pick the TPU tensors out from export_output and later return them

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _call_model_fn(self, features, labels, mode, config)
2184 else:
2185 return super(TPUEstimator, self)._call_model_fn(
-> 2186 features, labels, mode, config)
2187
2188 def _call_model_fn_for_inference(self, features, labels, mode, config):

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1167
1168 logging.info('Calling model_fn.')
-> 1169 model_fn_results = self._model_fn(features=features, **kwargs)
1170 logging.info('Done calling model_fn.')
1171

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _model_fn(features, labels, mode, config, params)
2468 logging.info('Running %s on CPU', mode)
2469 estimator_spec = model_fn_wrapper.call_without_tpu(
-> 2470 features, labels, is_export_mode=is_export_mode)
2471 estimator_spec = estimator_spec._replace(
2472 training_hooks=estimator_spec.training_hooks + (examples_hook,))

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in call_without_tpu(self, features, labels, is_export_mode)
1248
1249 def call_without_tpu(self, features, labels, is_export_mode):
-> 1250 return self._call_model_fn(features, labels, is_export_mode=is_export_mode)
1251
1252 def convert_to_single_tpu_train_step(self, dequeue_fn):

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _call_model_fn(self, features, labels, is_export_mode)
1522 _add_item_to_params(params, _CTX_KEY, user_context)
1523
-> 1524 estimator_spec = self._model_fn(features=features, **kwargs)
1525 if (running_on_cpu and
1526 isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)): # pylint: disable=protected-access

~/deploy/BERT-NER/model.py in model_fn(features, labels, mode, params)
81 (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
82 init_checkpoint)
---> 83 tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
84 if use_tpu:
85 def tpu_scaffold():

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py in init_from_checkpoint(ckpt_dir_or_file, assignment_map)
185 else:
186 distribution_strategy_context.get_tower_context().merge_call(
--> 187 _init_from_checkpoint, ckpt_dir_or_file, assignment_map)
188
189

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/distribute.py in merge_call(self, merge_fn, *args, **kwargs)
1038 """
1039 require_tower_context(self)
-> 1040 return self._merge_call(merge_fn, *args, **kwargs)
1041
1042 def _merge_call(self, merge_fn, *args, **kwargs):

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/distribute.py in _merge_call(self, merge_fn, *args, **kwargs)
1046 self._distribution_strategy))
1047 try:
-> 1048 return merge_fn(self._distribution_strategy, *args, **kwargs)
1049 finally:
1050 _pop_per_thread_mode()

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py in init_from_checkpoint(, ckpt_dir_or_file, assignment_map)
229 else:
230 var_name = ",".join([v.name for v in var])
--> 231 _set_variable_or_list_initializer(var, ckpt_file, tensor_name_in_ckpt)
232 logging.debug("Initialize variable %s from checkpoint %s with %s",
233 var_name, ckpt_dir_or_file, tensor_name_in_ckpt)

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py in _set_variable_or_list_initializer(variable_or_list, ckpt_file, tensor_name)
353 _set_checkpoint_initializer(v, ckpt_file, tensor_name, slice_info.spec)
354 else:
--> 355 _set_checkpoint_initializer(variable_or_list, ckpt_file, tensor_name, "")
356
357

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py in _set_checkpoint_initializer(variable, ckpt_file, tensor_name, slice_spec, name)
316
317 assert len(saveable_objects) == 1 # Should be only one variable.
--> 318 init_op = saveable_objects[0].restore([restore_op], restored_shapes=None)
319
320 # pylint:disable=protected-access

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, restored_tensors, restored_shapes)
159 restored_tensor = array_ops.identity(restored_tensor)
160 return resource_variable_ops.shape_safe_assign_variable_handle(
--> 161 self.handle_op, self._var_shape, restored_tensor)
162
163 def init(self, write_version=saver_pb2.SaverDef.V2):

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py in shape_safe_assign_variable_handle(handle, shape, value, name)
161 return gen_resource_variable_ops.assign_variable_op(handle,
162 value_tensor,
--> 163 name=name)
164
165

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/gen_resource_variable_ops.py in assign_variable_op(resource, value, name)
156 if _ctx is None or not _ctx._eager_context.is_eager:
157 _, _, _op = _op_def_lib._apply_op_helper(
--> 158 "AssignVariableOp", resource=resource, value=value, name=name)
159 return _op
160 _result = None

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
785 op = g.create_op(op_type_name, inputs, output_types, name=scope,
786 input_types=input_types, attrs=attr_protos,
--> 787 op_def=op_def)
788 return output_structure, op_def.is_stateful, op
789

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
486 'in a future version' if date is None else ('after %s' % date),
487 instructions)
--> 488 return func(*args, **kwargs)
489 return tf_decorator.make_decorator(func, new_func, 'deprecated',
490 _add_deprecated_arg_notice_to_docstring(

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in create_op(failed resolving arguments)
3270 input_types=input_types,
3271 original_op=self._default_original_op,
-> 3272 op_def=op_def)
3273 self._create_op_helper(ret, compute_device=compute_device)
3274 return ret

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in init(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1803
1804 if not c_op:
-> 1805 self._control_flow_post_processing()
1806
1807 def _control_flow_post_processing(self):

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _control_flow_post_processing(self)
1814 control_flow_util.CheckInputFromValidContext(self, input_tensor.op)
1815 if self._control_flow_context is not None:
-> 1816 self._control_flow_context.AddOp(self)
1817
1818 def _reconstruct_sequence_inputs(self, op_def, inputs, attrs):

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in AddOp(self, op)
1034
1035 def AddOp(self, op):
-> 1036 self._AddOpInternal(op)
1037
1038 def _AddOpInternal(self, op):

~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in _AddOpInternal(self, op)
1042 "Operation of type %s (%s) is not supported on the TPU for inference."
1043 " Execution will fail if this op is used in the graph. Make sure your"
-> 1044 " variables are using variable_scope." % (op.type, op.name))
1045 if self._outer_context:
1046 self._outer_context.AddInnerOp(op)

NotImplementedError: Operation of type AssignVariableOp (AssignVariableOp) is not supported on the TPU for inference. Execution will fail if this op is used in the graph. Make sure your variables are using variable_scope.

wazzy commented
def serving_input_fn():
  with tf.variable_scope("foo"):
    feature_spec = {
      "unique_ids": tf.FixedLenFeature([], tf.int64),
      "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
      "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
      "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
    }
    serialized_tf_example = tf.placeholder(shape=[None], dtype=tf.string)
    serialized_tf_example_1 = tf.placeholder(shape=[None], dtype=tf.string)
    serialized_tf_example_2 = tf.placeholder(shape=[None], dtype=tf.string)
    serialized_tf_example_3 = tf.placeholder(shape=[None], dtype=tf.string)

    received_tensors = { 
        'unique_ids': serialized_tf_example,
        'input_ids': serialized_tf_example_1,
        'input_mask': serialized_tf_example_2,
        'segment_ids': serialized_tf_example_3,
    }
    def _decode_record(record):
      example = tf.parse_single_example(record, feature_spec)
      for name in list(example.keys()):
        t = example[name]
        if t.dtype == tf.int64:
          t = tf.to_int32(t)
      return t
    features = {}
    feature_spec = { "unique_ids": tf.FixedLenFeature([], tf.int64), }
    features['unique_ids'] = tf.map_fn(_decode_record, serialized_tf_example, dtype=tf.int32)
    feature_spec = { "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), }
    features['input_ids'] = tf.map_fn(_decode_record, serialized_tf_example_1, dtype=tf.int32)
    feature_spec = { "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), }
    features['input_mask'] = tf.map_fn(_decode_record, serialized_tf_example_2, dtype=tf.int32)
    feature_spec = { "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), }
    features['segment_ids'] = tf.map_fn(_decode_record, serialized_tf_example_3, dtype=tf.int32)
    return tf.estimator.export.ServingInputReceiver(features, received_tensors)

NotImplementedError: Operation of type AssignVariableOp (AssignVariableOp) is not supported on the TPU for inference. Execution will fail if this op is used in the graph. Make sure your variables are using variable_scope.

I find my mistake, tf.train.init_from_checkpoint function should not be called during export the model

wazzy commented

@sminder Is my serving_input_fn function is correct??
How have you made the change to call export before tf.train.init_from_checkpoint function or how do I stop calling tf.train.init_from_checkpoint at the time of export.

@wazzy I have tested your serving_input_fn function, it's ok.
I found the init_from_checkpoint function in the model_fn using for the estimator, just like this:

tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

export code like this:

estimator._export_to_tpu = False  # this is important
estimator.export_savedmodel('export_t', serving_input_fn)

@sminder thanks for the input. I think your feedback worked because the saved_model_cli is able to read the saved model.

saved_model_cli show --all --dir $PROJECT_HOME/export_t/1544203049

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_ids'] tensor_info:
        dtype: DT_STRING
        shape: (-1)
        name: foo/Placeholder_1:0
    inputs['input_mask'] tensor_info:
        dtype: DT_STRING
        shape: (-1)
        name: foo/Placeholder_2:0
    inputs['label_ids'] tensor_info:
        dtype: DT_STRING
        shape: (-1)
        name: foo/Placeholder:0
    inputs['segment_ids'] tensor_info:
        dtype: DT_STRING
        shape: (-1)
        name: foo/Placeholder_3:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['output'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 935)
        name: loss/Softmax:0
  Method name is: tensorflow/serving/predict

the issue i am having now is actually getting predictions from the saved model. Can you provide an example using input_exprs? Following based on example

saved_model_cli run --dir $PROJECT_HOME/export_t/1544203049 --tag_set serve --signature_def serving_default \
--input_exprs 'input_ids=np.zeros((64));input_mask=np.zeros((64));label_ids=[0];segment_ids=np.zeros((64))'

gives

tensorflow.python.framework.errors_impl.InternalError: Unable to get element as bytes.

checkout https://github.com/bigboNed3/bert_serving.
i use tf.estimator.export.build_raw_serving_input_receiver_fn
@lapolonio @wazzy

Hi @bigboNed3

Have you ever met this bug when you tried saved_model_cli command like this:

saved_model_cli run --dir /path/to/your_model/your_model_version --tag_set serve --signature_def serving_default \
--input_exprs 'input_ids=np.zeros((1,128));input_mask=np.zeros((1,128));label_ids=[0];segment_ids=np.zeros((1,128))'

File "/Users/cdong/miniconda3/bin/saved_model_cli", line 11, in <module>
    sys.exit(main())
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 802, in main
    args.func(args)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 635, in run
    args.overwrite, tf_debug=args.tf_debug)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 337, in run_saved_model_with_feed_dict
    outputs = sess.run(output_tensor_names_sorted, feed_dict=inputs_feed_dict)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
    run_metadata_ptr)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
    feed_dict_tensor, options, run_metadata)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
    run_metadata)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits must be 2-dimensional
         [[Node: bert/encoder/layer_0/attention/self/Softmax = Softmax[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](bert/encoder/layer_0/attention/self/$
dd)]]

and the model's structure is

saved_model_cli show --all --dir serving/tensorflow_serving/servables/tensorflow/testdata/bert_sentiment/1544601708

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_ids_1:0
    inputs['input_mask'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_mask_1:0
    inputs['label_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1)
        name: label_ids_1:0
    inputs['segment_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: segment_ids_1:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['output'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 3)
        name: loss/Softmax:0
  Method name is: tensorflow/serving/predict

@neutronest @wazzy it looks like in https://github.com/bigboNed3/bert_serving run_classifier forgot to skip init_from_checkpoint when exporting the model.

i'm working on an end to end example.
my input_fn:

def serving_input_fn():
    with tf.variable_scope("foo"):
      feature_spec = {
          "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "label_ids": tf.FixedLenFeature([], tf.int64),
        }
      serialized_tf_example = tf.placeholder(dtype=tf.string,
                                             shape=[None],
                                             name='input_example_tensor')
      receiver_tensors = {'examples': serialized_tf_example}
      features = tf.parse_example(serialized_tf_example, feature_spec)
      return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

which creates a model that lets you do inference like:

saved_model_cli run --dir $PROJECT_HOME/export_t/1544223413 --tag_set serve --signature_def serving_default \
--input_examples 'examples=[{"input_ids":np.zeros((64), dtype=int).tolist(),"input_mask":np.zeros((64), dtype=int).tolist(),"label_ids":[0],"segment_ids":np.zeros((64), dtype=int).tolist()}]'

Hi @bigboNed3

Have you ever met this bug when you tried saved_model_cli command like this:

saved_model_cli run --dir /path/to/your_model/your_model_version --tag_set serve --signature_def serving_default \
--input_exprs 'input_ids=np.zeros((1,128));input_mask=np.zeros((1,128));label_ids=[0];segment_ids=np.zeros((1,128))'

File "/Users/cdong/miniconda3/bin/saved_model_cli", line 11, in <module>
    sys.exit(main())
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 802, in main
    args.func(args)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 635, in run
    args.overwrite, tf_debug=args.tf_debug)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 337, in run_saved_model_with_feed_dict
    outputs = sess.run(output_tensor_names_sorted, feed_dict=inputs_feed_dict)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
    run_metadata_ptr)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
    feed_dict_tensor, options, run_metadata)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
    run_metadata)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits must be 2-dimensional
         [[Node: bert/encoder/layer_0/attention/self/Softmax = Softmax[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](bert/encoder/layer_0/attention/self/$
dd)]]

and the model's structure is

saved_model_cli show --all --dir serving/tensorflow_serving/servables/tensorflow/testdata/bert_sentiment/1544601708

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_ids_1:0
    inputs['input_mask'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_mask_1:0
    inputs['label_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1)
        name: label_ids_1:0
    inputs['segment_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: segment_ids_1:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['output'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 3)
        name: loss/Softmax:0
  Method name is: tensorflow/serving/predict

yes.I met that bug.I can't solve it,
so I use tf.estimator.export.build_raw_serving_input_receiver_fn instead.
hope @lapolonio 's method will work

@lapolonio my export function is like this:

def export_model(dir_path):
    MAX_SEQ_LEN = 128
    def serving_input_receiver_fn():
        """An input receiver that expects a serialized tf.Example."""
        reciever_tensors = {
            "input_ids": tf.placeholder(dtype=tf.int64,
                                        shape=[1, MAX_SEQ_LEN])
        }
        features = {
            "input_ids": reciever_tensors['input_ids'],
            "input_mask": 1 - tf.cast(tf.equal(reciever_tensors['input_ids'], 0), dtype=tf.int64),
            "segment_ids": tf.zeros(dtype=tf.int64,
                                    shape=[1, MAX_SEQ_LEN])
        }
        return tf.estimator.export.ServingInputReceiver(features, reciever_tensors)

    estimator._export_to_tpu = False
    estimator.export_savedmodel(dir_path, serving_input_receiver_fn)

and during export the model, I deleted all the unrelated functions for training and evaluation, and it's ok.

Hi @bigboNed3

Have you ever met this bug when you tried saved_model_cli command like this:

saved_model_cli run --dir /path/to/your_model/your_model_version --tag_set serve --signature_def serving_default \
--input_exprs 'input_ids=np.zeros((1,128));input_mask=np.zeros((1,128));label_ids=[0];segment_ids=np.zeros((1,128))'

File "/Users/cdong/miniconda3/bin/saved_model_cli", line 11, in <module>
    sys.exit(main())
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 802, in main
    args.func(args)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 635, in run
    args.overwrite, tf_debug=args.tf_debug)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 337, in run_saved_model_with_feed_dict
    outputs = sess.run(output_tensor_names_sorted, feed_dict=inputs_feed_dict)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
    run_metadata_ptr)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
    feed_dict_tensor, options, run_metadata)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
    run_metadata)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits must be 2-dimensional
         [[Node: bert/encoder/layer_0/attention/self/Softmax = Softmax[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](bert/encoder/layer_0/attention/self/$
dd)]]

and the model's structure is

saved_model_cli show --all --dir serving/tensorflow_serving/servables/tensorflow/testdata/bert_sentiment/1544601708

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_ids_1:0
    inputs['input_mask'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_mask_1:0
    inputs['label_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1)
        name: label_ids_1:0
    inputs['segment_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: segment_ids_1:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['output'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 3)
        name: loss/Softmax:0
  Method name is: tensorflow/serving/predict

Hi @bigboNed3
Have you ever met this bug when you tried saved_model_cli command like this:

saved_model_cli run --dir /path/to/your_model/your_model_version --tag_set serve --signature_def serving_default \
--input_exprs 'input_ids=np.zeros((1,128));input_mask=np.zeros((1,128));label_ids=[0];segment_ids=np.zeros((1,128))'

File "/Users/cdong/miniconda3/bin/saved_model_cli", line 11, in <module>
    sys.exit(main())
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 802, in main
    args.func(args)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 635, in run
    args.overwrite, tf_debug=args.tf_debug)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 337, in run_saved_model_with_feed_dict
    outputs = sess.run(output_tensor_names_sorted, feed_dict=inputs_feed_dict)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
    run_metadata_ptr)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
    feed_dict_tensor, options, run_metadata)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
    run_metadata)
  File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits must be 2-dimensional
         [[Node: bert/encoder/layer_0/attention/self/Softmax = Softmax[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](bert/encoder/layer_0/attention/self/$
dd)]]

and the model's structure is

saved_model_cli show --all --dir serving/tensorflow_serving/servables/tensorflow/testdata/bert_sentiment/1544601708

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_ids_1:0
    inputs['input_mask'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_mask_1:0
    inputs['label_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1)
        name: label_ids_1:0
    inputs['segment_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: segment_ids_1:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['output'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 3)
        name: loss/Softmax:0
  Method name is: tensorflow/serving/predict

yes.I met that bug.I can't solve it,
so I use tf.estimator.export.build_raw_serving_input_receiver_fn instead.
hope @lapolonio 's method will work

For more information, I have met similar errors such as "logits must be 2-dimensional" while request the service with tensorflow serving, the final reason is the version of tensorflow serving is too low๏ผŒchange to a higher version 1.12.0 solved the errorใ€‚

when i export my model as follows,how can i send batch requests to the server.
Thanks in advance for all the help.

signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['examples'] tensor_info:
dtype: DT_STRING
shape: unknown_rank
name: input_example_tensor:0
The given SavedModel SignatureDef contains the following output(s):
outputs['end_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:1
outputs['start_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:0
outputs['unique_ids'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: ParseExample/ParseExample:4
Method name is: tensorflow/serving/predict

@sminder @bigboNed3 @wazzy @lapolonio

For my application, I would like to take sentences as inputs and extract word embeddings from layer -2 (as described here: https://github.com/google-research/bert#using-bert-to-extract-fixed-feature-vectors-like-elmo). Any guidance on what I need to do to export a TensorFlow Serving model that can do that?

@sminder do you mind sharing the entire code to export the BERT model?

@lapolonio my export function is like this:

def export_model(dir_path):
    MAX_SEQ_LEN = 128
    def serving_input_receiver_fn():
        """An input receiver that expects a serialized tf.Example."""
        reciever_tensors = {
            "input_ids": tf.placeholder(dtype=tf.int64,
                                        shape=[1, MAX_SEQ_LEN])
        }
        features = {
            "input_ids": reciever_tensors['input_ids'],
            "input_mask": 1 - tf.cast(tf.equal(reciever_tensors['input_ids'], 0), dtype=tf.int64),
            "segment_ids": tf.zeros(dtype=tf.int64,
                                    shape=[1, MAX_SEQ_LEN])
        }
        return tf.estimator.export.ServingInputReceiver(features, reciever_tensors)

    estimator._export_to_tpu = False
    estimator.export_savedmodel(dir_path, serving_input_receiver_fn)

and during export the model, I deleted all the unrelated functions for training and evaluation, and it's ok.

Hi, @lapolonio :
Your code works as below, but I have issue use your serving_input_fun to call service through gRPC client, not likely to pass in example in gRPC, have you tries that?

@neutronest @wazzy it looks like in https://github.com/bigboNed3/bert_serving run_classifier forgot to skip init_from_checkpoint when exporting the model.

i'm working on an end to end example.
my input_fn:

def serving_input_fn():
    with tf.variable_scope("foo"):
      feature_spec = {
          "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "label_ids": tf.FixedLenFeature([], tf.int64),
        }
      serialized_tf_example = tf.placeholder(dtype=tf.string,
                                             shape=[None],
                                             name='input_example_tensor')
      receiver_tensors = {'examples': serialized_tf_example}
      features = tf.parse_example(serialized_tf_example, feature_spec)
      return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

which creates a model that lets you do inference like:

saved_model_cli run --dir $PROJECT_HOME/export_t/1544223413 --tag_set serve --signature_def serving_default \
--input_examples 'examples=[{"input_ids":np.zeros((64), dtype=int).tolist(),"input_mask":np.zeros((64), dtype=int).tolist(),"label_ids":[0],"segment_ids":np.zeros((64), dtype=int).tolist()}]'

@gkovacslmi
Hi, I am trying solution1: convert ckpt to pb file and use Java to serve. The code is as below:

import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from tensorflow.python.saved_model import tag_constants
from tensorflow.core.protobuf import saver_pb2

freeze_graph.freeze_graph(input_graph=MODEL_DIR + SEP + 'graph.pbtxt', input_saver='', input_binary=False, input_checkpoint=MODEL_DIR + SEP + 'model.ckpt-'+version, output_node_names='loss/Softmax', restore_op_name=None, filename_tensor_name=None, output_graph=PB_MODEL_FILE, clear_devices=True, initializer_nodes=None, variable_names_whitelist="", variable_names_blacklist="", input_meta_graph=None, input_saved_model_dir=None, saved_model_tags=tag_constants.SERVING, checkpoint_version=saver_pb2.SaverDef.V2)

but it throws an out of range exception:

Traceback (most recent call last):
  File "fileconverter.py", line 29, in <module>
    convert(str(2504))
  File "fileconverter.py", line 24, in convert
    checkpoint_version=saver_pb2.SaverDef.V2)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/tools/freeze_graph.py", line 363, in freeze_graph
    checkpoint_version=checkpoint_version)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/tools/freeze_graph.py", line 190, in freeze_graph_with_def_protos
    var_list=var_list, write_version=checkpoint_version)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1102, in __init__
    self.build()
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1114, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1151, in _build
    build_save=build_save, build_restore=build_restore)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 773, in _build_internal
    saveables = self._ValidateAndSliceInputs(names_to_saveables)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 680, in _ValidateAndSliceInputs
    for converted_saveable_object in self.SaveableObjectsForOp(op, name):
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 654, in SaveableObjectsForOp
    variable, "", name)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 128, in __init__
    self.handle_op = var.op.inputs[0]
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2128, in __getitem__
    return self._inputs[i]
IndexError: list index out of range

I tried python version 2.7 3.5 and 3.6, tf version 1.11 and 1.12 but doesn't work.
Can you share your freeze_graph?
Thanks in advance for all the help.

@gkovacslmi
Hi, I am trying solution1: convert ckpt to pb file and use Java to serve. The code is as below:

import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from tensorflow.python.saved_model import tag_constants
from tensorflow.core.protobuf import saver_pb2

freeze_graph.freeze_graph(input_graph=MODEL_DIR + SEP + 'graph.pbtxt', input_saver='', input_binary=False, input_checkpoint=MODEL_DIR + SEP + 'model.ckpt-'+version, output_node_names='loss/Softmax', restore_op_name=None, filename_tensor_name=None, output_graph=PB_MODEL_FILE, clear_devices=True, initializer_nodes=None, variable_names_whitelist="", variable_names_blacklist="", input_meta_graph=None, input_saved_model_dir=None, saved_model_tags=tag_constants.SERVING, checkpoint_version=saver_pb2.SaverDef.V2)

but it throws an out of range exception:

Traceback (most recent call last):
  File "fileconverter.py", line 29, in <module>
    convert(str(2504))
  File "fileconverter.py", line 24, in convert
    checkpoint_version=saver_pb2.SaverDef.V2)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/tools/freeze_graph.py", line 363, in freeze_graph
    checkpoint_version=checkpoint_version)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/tools/freeze_graph.py", line 190, in freeze_graph_with_def_protos
    var_list=var_list, write_version=checkpoint_version)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1102, in __init__
    self.build()
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1114, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1151, in _build
    build_save=build_save, build_restore=build_restore)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 773, in _build_internal
    saveables = self._ValidateAndSliceInputs(names_to_saveables)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 680, in _ValidateAndSliceInputs
    for converted_saveable_object in self.SaveableObjectsForOp(op, name):
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 654, in SaveableObjectsForOp
    variable, "", name)
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 128, in __init__
    self.handle_op = var.op.inputs[0]
  File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2128, in __getitem__
    return self._inputs[i]
IndexError: list index out of range

I tried python version 2.7 3.5 and 3.6, tf version 1.11 and 1.12 but doesn't work.
Can you share your freeze_graph?
Thanks in advance for all the help.

I used export_savedmodel
https://github.com/bigboNed3/bert_serving

@gkovacslmi @wazzy @fuxia0425 @sminder
Anyone has written code to serve the trained model for SQUAD? I am new to tensorflow and it'll be a great help if you could share your code.
Pointing me to some resource that can teach me how to write a serving_function and then use the saved model to make predictions for BERT-for-QuestionAnswering(SQUAD) will be a great help too.

@neutronest @wazzy it looks like in https://github.com/bigboNed3/bert_serving run_classifier forgot to skip init_from_checkpoint when exporting the model.

i'm working on an end to end example.
my input_fn:

def serving_input_fn():
    with tf.variable_scope("foo"):
      feature_spec = {
          "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "label_ids": tf.FixedLenFeature([], tf.int64),
        }
      serialized_tf_example = tf.placeholder(dtype=tf.string,
                                             shape=[None],
                                             name='input_example_tensor')
      receiver_tensors = {'examples': serialized_tf_example}
      features = tf.parse_example(serialized_tf_example, feature_spec)
      return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

which creates a model that lets you do inference like:

saved_model_cli run --dir $PROJECT_HOME/export_t/1544223413 --tag_set serve --signature_def serving_default \
--input_examples 'examples=[{"input_ids":np.zeros((64), dtype=int).tolist(),"input_mask":np.zeros((64), dtype=int).tolist(),"label_ids":[0],"segment_ids":np.zeros((64), dtype=int).tolist()}]'

Hello, I follow your guide to export model and then I got the same results(output) when I try to use it to predict. I verified all inputs are changed before Session.run() every time, but the output which get_by_name(Softmax:0 or per_example_loss:0) are always the same. Please help me , Thanks!

@ahzz1207 I created an example of serving in google cloud platform using Kubernetes. https://github.com/lapolonio/bert/blob/classify_ag_news/make_containers.sh are the commands. I'm writing a post for medium. https://medium.com/@lapolonio/enterprise-solution-for-text-classification-using-bert-9fe2b7234c46 Feedback appreciated!

@neutronest @wazzy it looks like in https://github.com/bigboNed3/bert_serving run_classifier forgot to skip init_from_checkpoint when exporting the model.

i'm working on an end to end example.
my input_fn:

def serving_input_fn():
    with tf.variable_scope("foo"):
      feature_spec = {
          "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
          "label_ids": tf.FixedLenFeature([], tf.int64),
        }
      serialized_tf_example = tf.placeholder(dtype=tf.string,
                                             shape=[None],
                                             name='input_example_tensor')
      receiver_tensors = {'examples': serialized_tf_example}
      features = tf.parse_example(serialized_tf_example, feature_spec)
      return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

which creates a model that lets you do inference like:

saved_model_cli run --dir $PROJECT_HOME/export_t/1544223413 --tag_set serve --signature_def serving_default \
--input_examples 'examples=[{"input_ids":np.zeros((64), dtype=int).tolist(),"input_mask":np.zeros((64), dtype=int).tolist(),"label_ids":[0],"segment_ids":np.zeros((64), dtype=int).tolist()}]'

hi, i use your code to export the bert model, but when I use tf.parse_example, I have encountered an error. do you meet this problem?

@Olivia-xu No. What is your error?
I have a walkthrough here: https://medium.com/@lapolonio/enterprise-solution-for-text-classification-using-bert-9fe2b7234c46 Feedback appreciated!

@lapolonio solution fails on tensorflow 1.13.0rc, I have tried running the colab

---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
in ()
20 export_dir_base = 'gs://{}/bert/{}'.format(BUCKET,EXPORT_MODEL_DIR)
21 estimator._export_to_tpu = False # this is important
---> 22 path = estimator.export_saved_model(export_dir_base,serving_input_fn)
23 print(path)

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_saved_model(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path)
708 as_text=as_text,
709 checkpoint_path=checkpoint_path,
--> 710 strip_default_attrs=True)
711
712 def _export_saved_model_for_mode(

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_savedmodel(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs)
637 checkpoint_path=checkpoint_path,
638 strip_default_attrs=strip_default_attrs,
--> 639 mode=model_fn_lib.ModeKeys.PREDICT)
640
641 def export_saved_model(

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_saved_model_for_mode(self, export_dir_base, input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs, mode)
763 as_text=as_text,
764 checkpoint_path=checkpoint_path,
--> 765 strip_default_attrs=strip_default_attrs)
766
767 def _export_all_saved_models(

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra, as_text, checkpoint_path, strip_default_attrs)
881 builder, input_receiver_fn_map, checkpoint_path,
882 strip_default_attrs, save_variables,
--> 883 mode=model_fn_lib.ModeKeys.PREDICT)
884 save_variables = False
885

TypeError: _add_meta_graph_for_mode() got multiple values for argument 'mode' 

@shawei3000 Thanks for the feedback! I just forced the version to be Tensorflow==1.12 and it's working again.

P.S. if you could give feedback in the medium post that would help clean up this thread.

@lapolonio

So how to write a python client ?

Here is my code

import grpc
import tensorflow as tf
from tensorflow_serving.apis import classification_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc

channel = grpc.insecure_channel('127.0.0.1:9000')
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

import numpy as np

examples = [{"input_ids": np.zeros((128), dtype=int).tolist(),
             "input_mask": np.zeros((128), dtype=int).tolist(),
             "label_ids": [0],
             "segment_ids": np.zeros((128), dtype=int).tolist()}]
# ###########################################
example = tf.train.Example()
for key, value in examples[0].items():
    for v in value:
        example.features.feature[key].int64_list.value.append(v)

request = classification_pb2.ClassificationRequest()
request.model_spec.name = 'default'
request.model_spec.signature_name = 'serving_default'
request.input.example_list.examples.extend(examples)
response = stub.Classify(request, 10.0)

the ERROR:

 from ._conv import register_converters as _register_converters
Traceback (most recent call last):
  File "/home/xzc/PycharmProjects/TF_Serving/bert_client.py", line 27, in <module>
    request.input.example_list.examples.extend(examples)
TypeError: Not a cmessage

@lapolonio solution fails on tensorflow 1.13.0rc, I have tried running the colab

---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
in ()
20 export_dir_base = 'gs://{}/bert/{}'.format(BUCKET,EXPORT_MODEL_DIR)
21 estimator._export_to_tpu = False # this is important
---> 22 path = estimator.export_saved_model(export_dir_base,serving_input_fn)
23 print(path)

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_saved_model(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path)
708 as_text=as_text,
709 checkpoint_path=checkpoint_path,
--> 710 strip_default_attrs=True)
711
712 def _export_saved_model_for_mode(

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_savedmodel(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs)
637 checkpoint_path=checkpoint_path,
638 strip_default_attrs=strip_default_attrs,
--> 639 mode=model_fn_lib.ModeKeys.PREDICT)
640
641 def export_saved_model(

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_saved_model_for_mode(self, export_dir_base, input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs, mode)
763 as_text=as_text,
764 checkpoint_path=checkpoint_path,
--> 765 strip_default_attrs=strip_default_attrs)
766
767 def _export_all_saved_models(

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra, as_text, checkpoint_path, strip_default_attrs)
881 builder, input_receiver_fn_map, checkpoint_path,
882 strip_default_attrs, save_variables,
--> 883 mode=model_fn_lib.ModeKeys.PREDICT)
884 save_variables = False
885

TypeError: _add_meta_graph_for_mode() got multiple values for argument 'mode' 

i got the same error, do you fix it ?

@yajian how did you get that error? at the top of the script, I install tensorflow==1.12
https://colab.research.google.com/gist/lapolonio/5ac6552fa4d58ea1419cc444d42ff5ff/bert-finetuning-with-cloud-tpu-sentence-and-sentence-pair-classification-tasks.ipynb#scrollTo=Ns3IlhryMY-f

@lapolonio Thanks for replay. I think it is really version problem. I can run your demo on 1.10, but get errors on 1.12

finally๏ผŒI figure it out.
########################################save pb model:

def serving_input_fn():
    label_ids = tf.placeholder(tf.int32, [None], name='label_ids')
    input_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_ids')
    input_mask = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_mask')
    segment_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='segment_ids')
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'label_ids': label_ids,
        'input_ids': input_ids,
        'input_mask': input_mask,
        'segment_ids': segment_ids,
    })()
    return input_fn


...

estimator._export_to_tpu = False  # this is important
estimator.export_savedmodel(FLAGS.output_dir, serving_input_fn)

###############################################client

channel = grpc.insecure_channel('IP:PORT')
   stub = prediction_service_pb2_grpc.PredictionServiceStub  (channel)
   data = [{"input_ids": np.zeros((128), dtype=int).tolist(),   "input_mask": np.zeros((128), dtype=int).tolist(),
            "label_ids": [0], "segment_ids": np.zeros((128),   dtype=int).tolist()}]
   request = predict_pb2.PredictRequest()
   request.model_spec.name = 'bert'
   request.model_spec.signature_name = 'serving_default'

   request.inputs['input_ids'].CopyFrom(
       tf.contrib.util.make_tensor_proto(np.zeros((128),   dtype=int).tolist(), shape=[1, 128]))
   request.inputs['input_mask'].CopyFrom(
       tf.contrib.util.make_tensor_proto(np.zeros((128),   dtype=int).tolist(), shape=[1, 128]))
   request.inputs['label_ids'].CopyFrom  (tf.contrib.util.make_tensor_proto([0], shape=[1, 1]))
   request.inputs['segment_ids'].CopyFrom(
       tf.contrib.util.make_tensor_proto(np.zeros((128),   dtype=int).tolist(), shape=[1, 128]))

   import time
   begin = time.time()
   result = stub.Predict(request, 10.0)  # 10 secs timeout
   end = time.time() - begin
   print('time {}'.format(end))
   print('length of probabilities:{}'.format(len  (result.outputs['probabilities'].float_val)))

@Gpwner you didn't use tensorproto to make the message https://github.com/lapolonio/bert/blob/classify_ag_news/run_app.py#L48

@lapolonio thanks !

@Olivia-xu No. What is your error?
I have a walkthrough here: https://medium.com/@lapolonio/enterprise-solution-for-text-classification-using-bert-9fe2b7234c46 Feedback appreciated!

I am very sorry about the reply so late. I think this code is right. My environment is set up incorrectly. After I changed to the python3 environment, it runs correctly. Sorry.

@ahzz1207 I created an example of serving in google cloud platform using Kubernetes. https://github.com/lapolonio/bert/blob/classify_ag_news/make_containers.sh are the commands. I'm writing a post for medium. https://medium.com/@lapolonio/enterprise-solution-for-text-classification-using-bert-9fe2b7234c46 Feedback appreciated!

Hi, i corrected this problem one day after but forgot to reply.
You can see this Text multi-label classification with Bert as service in here.I hoping can get your advise!

@lapolonio @Gpwner @bigboNed3

finally๏ผŒI figure it out.
########################################save pb model:

def serving_input_fn():
    label_ids = tf.placeholder(tf.int32, [None], name='label_ids')
    input_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_ids')
    input_mask = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_mask')
    segment_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='segment_ids')
    input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
        'label_ids': label_ids,
        'input_ids': input_ids,
        'input_mask': input_mask,
        'segment_ids': segment_ids,
    })()
    return input_fn


...

estimator._export_to_tpu = False  # this is important
estimator.export_savedmodel(FLAGS.output_dir, serving_input_fn)

###############################################client

channel = grpc.insecure_channel('IP:PORT')
   stub = prediction_service_pb2_grpc.PredictionServiceStub  (channel)
   data = [{"input_ids": np.zeros((128), dtype=int).tolist(),   "input_mask": np.zeros((128), dtype=int).tolist(),
            "label_ids": [0], "segment_ids": np.zeros((128),   dtype=int).tolist()}]
   request = predict_pb2.PredictRequest()
   request.model_spec.name = 'bert'
   request.model_spec.signature_name = 'serving_default'

   request.inputs['input_ids'].CopyFrom(
       tf.contrib.util.make_tensor_proto(np.zeros((128),   dtype=int).tolist(), shape=[1, 128]))
   request.inputs['input_mask'].CopyFrom(
       tf.contrib.util.make_tensor_proto(np.zeros((128),   dtype=int).tolist(), shape=[1, 128]))
   request.inputs['label_ids'].CopyFrom  (tf.contrib.util.make_tensor_proto([0], shape=[1, 1]))
   request.inputs['segment_ids'].CopyFrom(
       tf.contrib.util.make_tensor_proto(np.zeros((128),   dtype=int).tolist(), shape=[1, 128]))

   import time
   begin = time.time()
   result = stub.Predict(request, 10.0)  # 10 secs timeout
   end = time.time() - begin
   print('time {}'.format(end))
   print('length of probabilities:{}'.format(len  (result.outputs['probabilities'].float_val)))

Hi, I am trying to make a tensorflow-serving client for the SQUAD task. However, I get the error grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with: status = StatusCode.INVALID_ARGUMENT details = "unique_ids_1_1:0 is both fed and fetched."
This is the output of my saved_model_cli

`MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['input_ids'] tensor_info:
dtype: DT_INT64
shape: (-1, 384)
name: input_ids_1:0
inputs['input_mask'] tensor_info:
dtype: DT_INT64
shape: (-1, 384)
name: input_mask_1:0
inputs['segment_ids'] tensor_info:
dtype: DT_INT64
shape: (-1, 384)
name: segment_ids_1:0
inputs['unique_ids'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: unique_ids_1_1:0
The given SavedModel SignatureDef contains the following output(s):
outputs['end_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:1
outputs['start_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:0
outputs['unique_ids'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: unique_ids_1_1:0
Method name is: tensorflow/serving/predict

From what i understand, that i can't have the same tensor in both input and output. Any workaround for this? Should i just remove the unique_id from the output tensors, while exporting the model?

@ronalddas yeah I think you can take it out. i think that the order of predictions in a batch is stable. You can test locally and verify.

Hi guys first off I have to say I am new to tensorflow and machine learning.
I have been trying to get this fine-tuned classification working for the last 2-3 weeks and I'm getting closer thanks to all the useful posts on here.
@lapolonio I have done a classifier similar to your AgnewsProcessor but instead it classifies emotions - 'anger', 'joy' etc... It's called the IsearProcessor and I have fine-tuned on the Isear.csv dataset. Thanks for your samples they have really helped!
I'm at the point now where I have a fine-tuned model 'model_6' and am posting to flask locally using run_app.py. I have tensorflow/serving running locally with "docker run -p 8500:8500 -p 8501:8501 --mount type=bind,source=c:\models\model_6,target=/models/model_6 -e MODEL_NAME=model_6 -t _tensorflow/serving"

It is getting a prediction from the model every time but it's always the same prediction - joy !!
I think there might be something missing in my serving_input_receiver_fn() but I've tried a lot of variations and they all seem to lead back to this one which has been posted above.

I would really appreciate some guidance on this - I'm stuck in a rut at the moment but I feel it may be something small I am missing! The code for the serving_input_receiver_fn() and IsearProcessor are below.
As I mentioned, my problem is that the same prediction is being returned each time - "joy" and/or the float_val are always around the same:
float_val: 0.14242129027843475
float_val: 0.1477668285369873
float_val: 0.13308830559253693
float_val: 0.1376849114894867
float_val: 0.15062256157398224
float_val: 0.1419164538383484
float_val: 0.14649958908557892

Thanks,

  • Actually, forgot to mention - when I run the test data from test.tsv and do-predict=True then the predictions work ok!

_if FLAGS.do_serve:

feature_spec = {
    "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
    "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
    "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
    "label_ids": tf.FixedLenFeature([], tf.int64),
    "is_real_example": tf.FixedLenFeature([], tf.int64)
  }

def serving_input_receiver_fn():      
  #with tf.variable_scope("foo"):
  serialized_tf_example = tf.placeholder(dtype=tf.string,
                                          shape=[None],
                                          name='input_example_tensor')
  receiver_tensors = {'examples': serialized_tf_example}
  features = tf.parse_example(serialized_tf_example, feature_spec)

  return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

.......

class IsearProcessor(DataProcessor):
"""Processor for the MultiNLI data set (GLUE version)."""

def get_train_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")

def get_dev_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "dev.tsv")),
"dev_matched")

def get_test_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")

def get_labels(self):
"""See base class."""
return [
"anger",
"disgust",
"fear",
"guilt",
"joy",
"sadness",
"shame"
]

def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
if i == 0: # for header
continue
single_example = self._create_example(line, set_type)
examples.append(single_example)
return examples

def create_example(self, line, set_type):
guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(line[0]))
text_a = tokenization.convert_to_unicode(line[1])
if set_type == "test":
label = "joy"
else:
label = tokenization.convert_to_unicode(line[-1])
single_example = InputExample(guid=guid, text_a=text_a, label=label)
return single_example

@tomaryancit can you create a pull request? it's hard to figure out what's going on in the code you posted.

general suggestions:
how is the performance? during training is it increasing?
try testing examples where you know the class

@lapolonio many thanks for the reply.
I have created a pull request and uploaded run_classifier.py.
I am new to the github pull requests also, so hope I've done that Ok and you can see the file!

I'm happy with my testing on the model - it's just the serving that i am struggling with.

The idea of this fine-tuned BERT model is that I could feed in a sentence such as 'I am really happy' or 'I am feeling sad' and the model would classify the sentence under one of the 7 emotion labels that are specified the ISEAR dataset - https://www.unige.ch/cisa/research/materials-and-online-research/research-material/ .

However I am getting the same prediction every time. regardless of the sentence that i serve to the model.

I will upload the test.tsv and test_results.tsv in a while as they are on another pc.

Thanks again.

@lapolonio many thanks for the reply.
I have created a pull request and uploaded run_classifier.py.
I am new to the github pull requests also, so hope I've done that Ok and you can see the file!

I'm happy with my testing on the model - it's just the serving that i am struggling with.

The idea of this fine-tuned BERT model is that I could feed in a sentence such as 'I am really happy' or 'I am feeling sad' and the model would classify the sentence under one of the 7 emotion labels that are specified the ISEAR dataset - https://www.unige.ch/cisa/research/materials-and-online-research/research-material/ .

However I am getting the same prediction every time. regardless of the sentence that i serve to the model.

I will upload the test.tsv and test_results.tsv in a while as they are on another pc.

Thanks again.

Hi, I meet the same error like you that getting the same prediction every time when I followed their methods three months ago. You can see thesehttps://github.com/ahzz1207/Bert-with-text-classification-and-service, I used my run_classifier file to correct this error.Hoping that would help you.

@lapolonio many thanks for the reply.
I have created a pull request and uploaded run_classifier.py.
I am new to the github pull requests also, so hope I've done that Ok and you can see the file!
I'm happy with my testing on the model - it's just the serving that i am struggling with.
The idea of this fine-tuned BERT model is that I could feed in a sentence such as 'I am really happy' or 'I am feeling sad' and the model would classify the sentence under one of the 7 emotion labels that are specified the ISEAR dataset - https://www.unige.ch/cisa/research/materials-and-online-research/research-material/ .
However I am getting the same prediction every time. regardless of the sentence that i serve to the model.
I will upload the test.tsv and test_results.tsv in a while as they are on another pc.
Thanks again.

Hi, I meet the same error like you that getting the same prediction every time when I followed their methods three months ago. You can see thesehttps://github.com/ahzz1207/Bert-with-text-classification-and-service, I used my run_classifier file to correct this error.Hoping that would help you.

@ahzz1207 thanks for the reply.
When I click that link you provided I get "No results matched your search".

@lapolonio many thanks for the reply.
I have created a pull request and uploaded run_classifier.py.
I am new to the github pull requests also, so hope I've done that Ok and you can see the file!
I'm happy with my testing on the model - it's just the serving that i am struggling with.
The idea of this fine-tuned BERT model is that I could feed in a sentence such as 'I am really happy' or 'I am feeling sad' and the model would classify the sentence under one of the 7 emotion labels that are specified the ISEAR dataset - https://www.unige.ch/cisa/research/materials-and-online-research/research-material/ .
However I am getting the same prediction every time. regardless of the sentence that i serve to the model.
I will upload the test.tsv and test_results.tsv in a while as they are on another pc.
Thanks again.

Hi, I meet the same error like you that getting the same prediction every time when I followed their methods three months ago. You can see thesehttps://github.com/ahzz1207/Bert-with-text-classification-and-service, I used my run_classifier file to correct this error.Hoping that would help you.

@ahzz1207 thanks for the reply.
When I click that link you provided I get "No results matched your search".

Or you can just go to my github page, you will find it easily with the name.

@ahzz1207 I got it here thank you https://github.com/ahzz1207/Bert-with-text-classification-and-service.
So I see you have test_serving_model.py as a version of run_classify.py .
First thing I notice is that you dont have a serving_input_receiver_fn() which is the suggested method for tensorflow serving.
Have you taken a different approach to serving the model? Not sure I understand how you are serving.

@ahzz1207 I got it here thank you https://github.com/ahzz1207/Bert-with-text-classification-and-service.
So I see you have test_serving_model.py as a version of run_classify.py .
First thing I notice is that you dont have a serving_input_receiver_fn() which is the suggested method for tensorflow serving.
Have you taken a different approach to serving the model? Not sure I understand how you are serving.

I don't sure that you are saying is code in line 615~650 called input_fn method.

@gkovacslmi @wazzy @fuxia0425 @sminder
Anyone has written code to serve the trained model for SQUAD? I am new to tensorflow and it'll be a great help if you could share your code.
Pointing me to some resource that can teach me how to write a serving_function and then use the saved model to make predictions for BERT-for-QuestionAnswering(SQUAD) will be a great help too.

i have the same puzzled,have you solved the problem?

What is the size of your fine-tuned bert model?

Mine is 680MB (model is 900KB and variables is 678.47 MB), which is much larger than the 250MB limit for Google's AI Platform. I presume my model size is typical, or is there something I can do to cut down on the model size? (I have already requested Google to increase this quota for me)

estimator._export_to_tpu = False

This single line saved my life. Shouldn't it be in the documentation, or did I just miss it?

and how to predict when a text input by tensorflow-serving while get the bert pb model? thx

I've been playing around with the IMDB classifier example, and to finish up have been trying to write a serving_input_fn which will take plain text as input and calculate the features to pass to the model, to save a client needing to calculate them. I've managed to write one that saves happily without errors, but when I reload and try to predict, I keep getting an error of the form:

~/ml_exp/test_env/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1368 pass
1369 message = error_interpolation.interpolate(message, self._graph)
-> 1370 raise type(e)(node_def, op, message)
1371
1372 def _extend_graph(self):
InvalidArgumentError: Could not parse example input, value: '[{"input_ids": [101, 23435, 1006, 1000, 7953, 1035, 5164, 1035, 3793, 1024, 1014, 1000, 1010, 26718, 18863, 1027, 5164, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "input_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "segment_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "label_ids": 0}, {"input_ids": [101, 23435, 1006, 1000, 7953, 1035, 5164, 1035, 3793, 1024, 1014, 1000, 1010, 26718, 18863, 1027, 5164, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "input_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "segment_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "label_ids": 0}]'
[[node ParseExample/ParseExample (defined at :7) ]]

My code is below. I've been hitting my head against this for days and not getting anywhere. The example looks to be of the right format to me. Am I doing something stupid, or is this an issue?

import json
import base64

def plain_text_serving_input_fn():
    
    input_string = tf.placeholder(dtype=tf.string, shape=None, name='input_string_text')

    # What format to expect input in.
    receiver_tensors = {'input_text': input_string}
    
    input_examples = [run_classifier.InputExample(guid="", text_a = str(input_string), text_b = None, label = 0)] # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

    variables = {}
    for i in input_features:
        variables["input_ids"] = i.input_ids
        variables["input_mask"] = i.input_mask
        variables["segment_ids"] = i.segment_ids
        variables["label_ids"] = i.label_id

    feature_spec = {
      "input_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "input_mask" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "segment_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "label_ids" :  tf.FixedLenFeature([], tf.int64)
    }
    
    string_variables = json.dumps(variables)
    encode_input = base64.b64encode(string_variables.encode('utf-8'))
    encode_string = base64.decodestring(encode_input)
    
    
    features_to_input = tf.parse_example([encode_string], feature_spec)

    return tf.estimator.export.ServingInputReceiver(features_to_input, receiver_tensors)




estimator._export_to_tpu = False

estimator.export_saved_model(
    export_dir_base,
    plain_text_serving_input_fn
)

predict_fn = predictor.from_saved_model(export_dir)
predictions = predict_fn({'input_text': "This is some example text" })

I've been playing around with the IMDB classifier example, and to finish up have been trying to write a serving_input_fn which will take plain text as input and calculate the features to pass to the model, to save a client needing to calculate them. I've managed to write one that saves happily without errors, but when I reload and try to predict, I keep getting an error of the form:

~/ml_exp/test_env/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1368 pass
1369 message = error_interpolation.interpolate(message, self._graph)
-> 1370 raise type(e)(node_def, op, message)
1371
1372 def _extend_graph(self):
InvalidArgumentError: Could not parse example input, value: '[{"input_ids": [101, 23435, 1006, 1000, 7953, 1035, 5164, 1035, 3793, 1024, 1014, 1000, 1010, 26718, 18863, 1027, 5164, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "input_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "segment_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "label_ids": 0}, {"input_ids": [101, 23435, 1006, 1000, 7953, 1035, 5164, 1035, 3793, 1024, 1014, 1000, 1010, 26718, 18863, 1027, 5164, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "input_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "segment_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "label_ids": 0}]'
[[node ParseExample/ParseExample (defined at :7) ]]

My code is below. I've been hitting my head against this for days and not getting anywhere. The example looks to be of the right format to me. Am I doing something stupid, or is this an issue?

import json
import base64

def plain_text_serving_input_fn():
    
    input_string = tf.placeholder(dtype=tf.string, shape=None, name='input_string_text')

    # What format to expect input in.
    receiver_tensors = {'input_text': input_string}
    
    input_examples = [run_classifier.InputExample(guid="", text_a = str(input_string), text_b = None, label = 0)] # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

    variables = {}
    for i in input_features:
        variables["input_ids"] = i.input_ids
        variables["input_mask"] = i.input_mask
        variables["segment_ids"] = i.segment_ids
        variables["label_ids"] = i.label_id

    feature_spec = {
      "input_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "input_mask" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "segment_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "label_ids" :  tf.FixedLenFeature([], tf.int64)
    }
    
    string_variables = json.dumps(variables)
    encode_input = base64.b64encode(string_variables.encode('utf-8'))
    encode_string = base64.decodestring(encode_input)
    
    
    features_to_input = tf.parse_example([encode_string], feature_spec)

    return tf.estimator.export.ServingInputReceiver(features_to_input, receiver_tensors)




estimator._export_to_tpu = False

estimator.export_saved_model(
    export_dir_base,
    plain_text_serving_input_fn
)

predict_fn = predictor.from_saved_model(export_dir)
predictions = predict_fn({'input_text': "This is some example text" })

I'm pretty new to TF and I could not figure out what is missing in this. Did anyone get this to work?

Mark

One thing that I don't understand in code snippets in this issue/thread: if this is for serving/predicting, not for training, then what's the need for label_ids in build_raw_serving_input_receiver_fn and similar functions..?

that's what the model input function specifies but depending on the mode it might not be used

@AndrewMcDowell were you able to resolve the issue of InvalidArgumentError?

@pcnfernando No, unfortunately not. I spent a while trying different things, but nothing worked for me.

@AndrewMcDowell it looks like your features_to_input, receiver_tensors don't match. check out https://stackoverflow.com/questions/53410469/tensorflow-estimator-servinginputreceiver-features-vs-receiver-tensors-when-and

Your concrete next steps are checking that parse_example returns a SparseTensor

haowg commented

I also encountered this problem, which made me exhausted.
But when I checked my environment, I found that they were not the same. Then I reinstall the environment and solved the problem.

have problem:
serving env:
TensorFlow ModelServer: 1.10.0-dev
TensorFlow Library: 1.9.0
export env :
TensorFlow: 1.12.0

no problem:
serving env:
TensorFlow ModelServer: 1.10.0-dev
TensorFlow Library: 1.9.0
export env :
TensorFlow: 1.10.0

@lapolonio the intention is to input a text as input and the model to handle feature extraction within the model itself and thereafter return the result.

Following is the process of saving the model. Can you give some insight on what needs to be changed here?

 import json
 import base64

 def plain_text_serving_input_fn():
    
    input_string = tf.placeholder(dtype=tf.string, shape=None, name='input_string_text')

    # What format to expect input in.
    receiver_tensors = {'input_text': input_string}
    
    input_examples = [run_classifier.InputExample(guid="", text_a = str(input_string), text_b = None, label = 0)] # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

    variables = {}
    for i in input_features:
        variables["input_ids"] = i.input_ids
        variables["input_mask"] = i.input_mask
        variables["segment_ids"] = i.segment_ids
        variables["label_ids"] = i.label_id

    feature_spec = {
      "input_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "input_mask" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "segment_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
      "label_ids" :  tf.FixedLenFeature([], tf.int64)
    }
    
    string_variables = json.dumps(variables)
    encode_input = base64.b64encode(string_variables.encode('utf-8'))
    encode_string = base64.decodestring(encode_input)
    
    features_to_input = tf.parse_example([encode_string], feature_spec)

    return tf.estimator.export.ServingInputReceiver(features_to_input, receiver_tensors)

 estimator._export_to_tpu = False

 estimator.export_saved_model('/anaconda3/envs/tf/exported_with_txt',plain_text_serving_input_fn)

@pcnfernando you don't need to use parse_example refer to build_raw_serving_input_receiver_fn above

@lapolonio

I exported using build_raw_serving_input_receiver_fn and below is my signature def.

(base) Chirans-MacBook-Pro:bin chiranfernando$ ./saved_model_cli show --all --dir /anaconda3/envs/tf/text_input_model/1569807768 --tag_set serve

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_ids_1:0
    inputs['input_mask'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: input_mask_1:0
    inputs['label_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1)
        name: label_ids_1:0
    inputs['segment_ids'] tensor_info:
        dtype: DT_INT32
        shape: (-1, 128)
        name: segment_ids_1:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['labels'] tensor_info:
        dtype: DT_INT32
        shape: unknown_rank
        name: loss/Squeeze:0
    outputs['probabilities'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 2)
        name: loss/LogSoftmax:0
  Method name is: tensorflow/serving/predict

My confusion is how I can pass the sentence to it. Can you please reference me a sample input.

I'm trying to use the model with tensorflow java. I'm not using tensorflow serving.

@pcnfernando
you have to tokenize the string first. which turns the words into numbers. then those numbers are fed into the model.

As a summary and simplification of all comments above , I found the following method useful in generating SavedModel. The method bypasses serialization and deserialization related with tf.InputExample and the generated SavedModel accepts numpy arrays as inputs and so for benchmarking purpose it suffices to give synthetic array inputs (e.g., using np.zeros). You may edit the estimator.export_saved_model function call to put Savedmodel to another path.

  1. Apply the following patch to this repo
diff --git a/run_classifier.py b/run_classifier.py
index 817b147..c9426bc 100644
--- a/run_classifier.py
+++ b/run_classifier.py
@@ -955,6 +955,18 @@ def main(_):
         drop_remainder=predict_drop_remainder)
 
     result = estimator.predict(input_fn=predict_input_fn)
+    features = {
+        "input_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_ids'),
+        "input_mask": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_mask'),
+        "segment_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='segment_ids'),
+        "label_ids": tf.placeholder(shape=[None], dtype=tf.int32, name='label_ids'),
+        "is_real_example": tf.placeholder(shape=[None], dtype=tf.int32, name='is_real_example'),
+    }
+    serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(features)
+    estimator._export_to_tpu = False  ## !!important to add this
+    estimator.export_saved_model(
+        export_dir_base='./bert_classifier_saved_model',
+        serving_input_receiver_fn=serving_input_fn)
 
     output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
     with tf.gfile.GFile(output_predict_file, "w") as writer:
diff --git a/run_squad.py b/run_squad.py
index edd4c3e..7b49d9b 100644
--- a/run_squad.py
+++ b/run_squad.py
@@ -1250,6 +1250,18 @@ def main(_):
         is_training=False,
         drop_remainder=False)
 
+    features = {
+        "input_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_ids'),
+        "input_mask": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_mask'),
+        "segment_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='segment_ids'),
+        "unique_ids": tf.placeholder(shape=[None], dtype=tf.int32, name='unique_ids'),
+    }
+    serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(features)
+    estimator._export_to_tpu = False  ## !!important to add this
+    estimator.export_saved_model(
+        export_dir_base='./bert_squad_saved_model',
+        serving_input_receiver_fn=serving_input_fn)
+
     # If running eval on the TPU, you will need to specify the number of
     # steps.
     all_results = []
  1. Execute run_classifier.py or run_squad.py with --do_train=true --do_predict=true, then find the SavedModel in ./bert_classifier_saved_model or ./bert_squad_saved_model.

As a summary and simplification of all comments above , I found the following method useful in generating SavedModel. The method bypasses serialization and deserialization related with tf.InputExample and the generated SavedModel accepts numpy arrays as inputs and so for benchmarking purpose it suffices to give synthetic array inputs (e.g., using np.zeros). You may edit the estimator.export_saved_model function call to put Savedmodel to another path.

  1. Apply the following patch to this repo
diff --git a/run_classifier.py b/run_classifier.py
index 817b147..c9426bc 100644
--- a/run_classifier.py
+++ b/run_classifier.py
@@ -955,6 +955,18 @@ def main(_):
         drop_remainder=predict_drop_remainder)
 
     result = estimator.predict(input_fn=predict_input_fn)
+    features = {
+        "input_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_ids'),
+        "input_mask": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_mask'),
+        "segment_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='segment_ids'),
+        "label_ids": tf.placeholder(shape=[None], dtype=tf.int32, name='label_ids'),
+        "is_real_example": tf.placeholder(shape=[None], dtype=tf.int32, name='is_real_example'),
+    }
+    serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(features)
+    estimator._export_to_tpu = False  ## !!important to add this
+    estimator.export_saved_model(
+        export_dir_base='./bert_classifier_saved_model',
+        serving_input_receiver_fn=serving_input_fn)
 
     output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
     with tf.gfile.GFile(output_predict_file, "w") as writer:
diff --git a/run_squad.py b/run_squad.py
index edd4c3e..7b49d9b 100644
--- a/run_squad.py
+++ b/run_squad.py
@@ -1250,6 +1250,18 @@ def main(_):
         is_training=False,
         drop_remainder=False)
 
+    features = {
+        "input_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_ids'),
+        "input_mask": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_mask'),
+        "segment_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='segment_ids'),
+        "unique_ids": tf.placeholder(shape=[None], dtype=tf.int32, name='unique_ids'),
+    }
+    serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(features)
+    estimator._export_to_tpu = False  ## !!important to add this
+    estimator.export_saved_model(
+        export_dir_base='./bert_squad_saved_model',
+        serving_input_receiver_fn=serving_input_fn)
+
     # If running eval on the TPU, you will need to specify the number of
     # steps.
     all_results = []
  1. Execute run_classifier.py or run_squad.py with --do_train=true --do_predict=true, then find the SavedModel in ./bert_classifier_saved_model or ./bert_squad_saved_model.

The serialization was unnecessarily complicating the process for me. Following your advice I got rid of it and things went much more smoothly. Thank you.

Maybe check this out if you are looking for serving BERT fine-tuned model.
BERT Serving and Inferencing from fine-tuned

when i export my model as follows,how can i send batch requests to the server.
Thanks in advance for all the help.

signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['examples'] tensor_info:
dtype: DT_STRING
shape: unknown_rank
name: input_example_tensor:0
The given SavedModel SignatureDef contains the following output(s):
outputs['end_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:1
outputs['start_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:0
outputs['unique_ids'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: ParseExample/ParseExample:4
Method name is: tensorflow/serving/predict

Hi did you figure out how to do batch prediction?

I am training a Bert model using a google cloud storage bucket with the given data set. For some reason, at the final step, I get this error,
Tensor conversion requested dtype string for Tensor with dtype float32.
At first, I thought it was a problem with my dataset, but after some alterations with it, it did not turn out to be that. Does anyone have any ideas on why it is giving this exception?

2020-06-13 02:50:45,710 : Calling model_fn.
2020-06-13 02:50:45,723 : Error recorded from training_loop: in converted code:
relative to /usr/local/lib/python3.6/dist-packages/tensorflow_core/python:

data/ops/readers.py:336 __init__
    filenames, compression_type, buffer_size, num_parallel_reads)
data/ops/readers.py:296 __init__
    filenames = _create_or_validate_filenames_dataset(filenames)
data/ops/readers.py:56 _create_or_validate_filenames_dataset
    filenames = ops.convert_to_tensor(filenames, dtype=dtypes.string)
framework/ops.py:1184 convert_to_tensor
    return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
framework/ops.py:1242 convert_to_tensor_v2
    as_ref=False)
framework/ops.py:1273 internal_convert_to_tensor
    (dtype.name, value.dtype.name, value))

ValueError: Tensor conversion requested dtype string for Tensor with dtype float32: <tf.Tensor 'args_0:0' shape=() dtype=float32>

2020-06-13 02:50:45,724 : training_loop marked as finished
2020-06-13 02:50:45,725 : Reraising captured error

ValueError Traceback (most recent call last)
in ()
----> 1 estimator.train(input_fn=train_input_fn, max_steps=TRAIN_STEPS)

27 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise

ValueError: in converted code:
relative to /usr/local/lib/python3.6/dist-packages/tensorflow_core/python:

data/ops/readers.py:336 __init__
    filenames, compression_type, buffer_size, num_parallel_reads)
data/ops/readers.py:296 __init__
    filenames = _create_or_validate_filenames_dataset(filenames)
data/ops/readers.py:56 _create_or_validate_filenames_dataset
    filenames = ops.convert_to_tensor(filenames, dtype=dtypes.string)
framework/ops.py:1184 convert_to_tensor
    return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
framework/ops.py:1242 convert_to_tensor_v2
    as_ref=False)
framework/ops.py:1273 internal_convert_to_tensor
    (dtype.name, value.dtype.name, value))

ValueError: Tensor conversion requested dtype string for Tensor with dtype float32: <tf.Tensor 'args_0:0' shape=() dtype=float32>

Hi,

I'm trying to serve BERT-squad model on docker with REST

  1. I export BERT-squad model with below code AND got the export_model (saved_model.pb, variables, ... etc) files
    def serving_input_receiver_fn():
        feature_spec = {
            "unique_ids": tf.FixedLenFeature([], tf.int64),
            "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
            "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
            "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
        }
        serialized_tf_example = tf.placeholder(dtype=tf.string,
                                               shape=[1],
                                               name='input_example_tensor')
        receiver_tensors = {'examples': serialized_tf_example}
        features = tf.parse_example(serialized_tf_example, feature_spec)

        return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)

    estimator._export_to_tpu = False
    estimator.export_savedmodel(os.path.join(FLAGS.output_dir, "export_model"), serving_input_receiver_fn)
  1. I run docker for serving export_model with REST (gRPC:8500, port:8501)
docker run -t --restart always -p 8501:8501
  -v "${QA_PATH}:/models/squad"
  -e MODEL_NAME=squad
  tensorflow/serving &

And then, I can't find how to make input form for requesting BERT-squad model.

I tried read_squad_examples(), FeatureWriter(), convert_examples_to_features() functions to make a input form. But I failed all (Reference: https://medium.com/@joyceye04/deploy-a-servable-bert-qa-model-using-tensorflow-serving-d848f9797d9)

In BERT-classify case, Just embed text to number using vocab. But BERT-squad, It's not easy I think

How can I make a input form for REST?

Okay finally figured out how to do this on Vertex AI with a SQuAD2 fine-tuned model using tensorflow version 1.15.

First of all per this comment deepjavalibrary/djl#152 to this issue deepjavalibrary/djl#152
we need to change the following around line 672 in run_squad.py:

...
predictions = {
          "unique_ids": unique_ids,
          "start_logits": start_logits,
          "end_logits": end_logits,
      }
...

to

...
predictions = {
          "unique_ids": tf.identity(unique_ids),
          "start_logits": start_logits,
          "end_logits": end_logits,
      }
...

to avoid "unique_ids is both fed and fetched" errors.
Then we can export the saved_model.pb by adding the following to run_squad.py (i add just after if FLAGS.do_predict:):

    def serving_input_receiver_fn():
        unique_ids = tf.placeholder(tf.int32, [None], name='unique_ids')
        input_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_ids')
        input_mask = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_mask')
        segment_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='segment_ids')
        input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
            'unique_ids': unique_ids,
            'input_ids': input_ids,
            'input_mask': input_mask,
            'segment_ids': segment_ids,
        })()
        return input_fn

    estimator._export_to_tpu = False
    estimator.export_savedmodel(FLAGS.output_dir, serving_input_receiver_fn)

it is important to use raw_serving since "[...] the HTTP server for pre-built Vertex AI deployment containers does not support the HTTP request body for TensorFlow 1.x estimators. Using raw predict, one can send raw content through the HTTP server that is presented to the model input as-is -- no canonical processing." as per this guide: https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage6/get_started_with_raw_predict.ipynb

The usual predict script can then be run which exports a saved_model.pb file.
This model file can be manually imported and deployed in vertex ai using the vertex ai gui.

To make a prediction

Read input data like this

input_data = "./dev-v2.0.json"
tokenizer = tokenization.FullTokenizer(vocab_file="./vocab.txt", do_lower_case=True)
eval_examples = read_squad_examples(input_data, is_training=False)
eval_example = [eval_examples[0]]

feature,input_feature = convert_examples_to_features(
            examples=eval_example,
            tokenizer=tokenizer,
            max_seq_length=384,
            doc_stride=128,
            max_query_length=64,
            is_training=False,
            output_fn=[])

eval_features = [input_feature]

import json

from google.api import httpbody_pb2
from google.cloud import aiplatform_v1

ENDPOINT_ID="ENDPOINT_ID"
PROJECT_ID="PROJECT_ID"

DATA = {
    "signature_name": "serving_default",
    "instances": [
        {
            "unique_ids": feature["unique_id"],
            "input_ids": feature["input_ids"],
            "input_mask": feature["input_mask"],
            "segment_ids": feature["segment_ids"]
        }
    ]
}

http_body = httpbody_pb2.HttpBody(
    data=json.dumps(DATA).encode("utf-8"),
    content_type="application/json",
)

req = aiplatform_v1.RawPredictRequest(
    http_body=http_body, endpoint=f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{ENDPOINT_ID}"
)

REGION="us-central1"
API_ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)
client_options = {"api_endpoint": API_ENDPOINT}

pred_client = aip.gapic.PredictionServiceClient(client_options=client_options)

response = pred_client.raw_predict(req)
print(response)

then the write_predictions function in run_squad.py can be used to write these

data = json.loads(response.data)

RawResult = collections.namedtuple("RawResult",
                                   ["unique_id", "start_logits", "end_logits"])


unique_id = data["predictions"][0]["unique_ids"]
start_logits = data["predictions"][0]["start_logits"]
end_logits = data["predictions"][0]["end_logits"]

formatted_result = RawResult(
        unique_id = unique_id,
        start_logits = start_logits,
        end_logits = end_logits)

all_results = [formatted_result]

write_predictions(eval_example, eval_features, all_results, n_best_size=3, max_answer_length=64, do_lower_case=True, output_prediction_file="./results/preds.json",output_nbest_file="./results/nbest.json", output_null_log_odds_file="./results/null_log_odds.json",version_2_with_negative=True)