Use BERT fine-tuned model for Tensorflow serving
gkovacslmi opened this issue ยท 72 comments
I'd like to publish an HTTP REST or GRPC API with TF serving for a BERT model fine-tuned on the SQuAD dataset. I tried two approaches but unfortunately both of them failed.
1.) Convert the files in the output directory (ckpt files, graph.pbtxt...) to the file structure that TF serving expects - saved_model.pb and the data/index files in the variables folder. This resulted in a model loading error when I tried to load it into a Docker container with the 'tensorflow/serving' image.
2.) The other approach was to create a model export after fine-tuning by calling TPUEstimator.export_savedmodel(...). This resulted in the following issue - an internal implementational detail in TF:
NotImplementedError: Operation of type AssignVariableOp (AssignVariableOp) is not supported on the TPU for inference. Execution will fail if this op is used in the graph. Make sure your variables are using variable_scope.
The way I tried to export the fine-tuned model after training is the following:
feature_spec = {
"unique_ids": tf.FixedLenFeature([], tf.int64),
"input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
}
def serving_input_receiver_fn():
serialized_tf_example = tf.placeholder(dtype=tf.string, shape=None,
name='input_example_tensor')
receiver_tensors = {'examples': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn, strip_default_attrs=True)
If my issue is unrelated to this repo, can you please redirect it to a place where model checkpoint <--> SavedModel conversion issue are considered to be on-topic?
Thanks in advance for all the help.
the same error!
anyone help...
the detail error is bellow:
NotImplementedError Traceback (most recent call last)
in
----> 1 estimator.export_savedmodel('export', serving_input_receiver_fn)
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in export_savedmodel(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs)
635 checkpoint_path=checkpoint_path,
636 strip_default_attrs=strip_default_attrs,
--> 637 mode=model_fn_lib.ModeKeys.PREDICT)
638
639 def export_saved_model(
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _export_saved_model_for_mode(self, export_dir_base, input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs, mode)
761 as_text=as_text,
762 checkpoint_path=checkpoint_path,
--> 763 strip_default_attrs=strip_default_attrs)
764
765 def _export_all_saved_models(
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra, as_text, checkpoint_path, strip_default_attrs)
879 builder, input_receiver_fn_map, checkpoint_path,
880 strip_default_attrs, save_variables,
--> 881 mode=model_fn_lib.ModeKeys.PREDICT)
882 save_variables = False
883
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _add_meta_graph_for_mode(self, builder, input_receiver_fn_map, checkpoint_path, strip_default_attrs, save_variables, mode, export_tags, check_variables)
2177 mode=mode,
2178 export_tags=export_tags,
-> 2179 check_variables=False))
2180
2181 def _call_model_fn(self, features, labels, mode, config):
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _add_meta_graph_for_mode(self, builder, input_receiver_fn_map, checkpoint_path, strip_default_attrs, save_variables, mode, export_tags, check_variables)
956 labels=getattr(input_receiver, 'labels', None),
957 mode=mode,
--> 958 config=self.config)
959
960 export_outputs = model_fn_lib.export_outputs_for_mode(
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _call_model_fn(self, features, labels, mode, config)
2181 def _call_model_fn(self, features, labels, mode, config):
2182 if mode == _REWRITE_FOR_INFERENCE_MODE:
-> 2183 return self._call_model_fn_for_inference(features, labels, mode, config)
2184 else:
2185 return super(TPUEstimator, self)._call_model_fn(
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _call_model_fn_for_inference(self, features, labels, mode, config)
2224 return tpu_tensors
2225
-> 2226 tpu_tensors_on_cpu = tpu.rewrite_for_inference(computation)
2227 estimator_spec, tensors_dict, tensors = capture.get()
2228
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/framework/python/framework/experimental.py in new_func(*args, **kwargs)
62 'any time, and without warning.',
63 decorator_utils.get_qualified_name(func), func.module)
---> 64 return func(*args, **kwargs)
65 new_func.doc = _add_experimental_function_notice_to_docstring(
66 func.doc)
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in rewrite_for_inference(computation, inputs, infeed_queue, device_assignment, name)
1155 infeed_queue=infeed_queue,
1156 device_assignment=device_assignment,
-> 1157 name=name)
1158 # pylint: enable=undefined-variable
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in rewrite(computation, inputs, infeed_queue, device_assignment, name)
994 infeed_queue=infeed_queue,
995 device_assignment=device_assignment,
--> 996 name=name)[0]
997 # pylint: enable=indexing-exception
998
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in replicate(computation, inputs, infeed_queue, device_assignment, name)
506 """
507 return split_compile_and_replicate(computation, inputs, infeed_queue,
--> 508 device_assignment, name)[1]
509
510
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in split_compile_and_replicate(failed resolving arguments)
657 vscope.set_use_resource(True)
658
--> 659 outputs = computation(*computation_inputs)
660
661 vscope.set_use_resource(saved_use_resource)
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in wrapped_computation(*args, **kwargs)
1141 vscope.set_caching_device(lambda op: op.device)
1142
-> 1143 result = computation(*args, **kwargs)
1144
1145 vscope.set_custom_getter(prev_custom_getter)
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in computation()
2208 # so that building the graph will happen under rewrite_for_inference
.
2209 mode = model_fn_lib.ModeKeys.PREDICT
-> 2210 estimator_spec = self._call_model_fn(features, labels, mode, config)
2211
2212 # We pick the TPU tensors out from export_output
and later return them
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _call_model_fn(self, features, labels, mode, config)
2184 else:
2185 return super(TPUEstimator, self)._call_model_fn(
-> 2186 features, labels, mode, config)
2187
2188 def _call_model_fn_for_inference(self, features, labels, mode, config):
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/estimator/estimator.py in _call_model_fn(self, features, labels, mode, config)
1167
1168 logging.info('Calling model_fn.')
-> 1169 model_fn_results = self._model_fn(features=features, **kwargs)
1170 logging.info('Done calling model_fn.')
1171
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _model_fn(features, labels, mode, config, params)
2468 logging.info('Running %s on CPU', mode)
2469 estimator_spec = model_fn_wrapper.call_without_tpu(
-> 2470 features, labels, is_export_mode=is_export_mode)
2471 estimator_spec = estimator_spec._replace(
2472 training_hooks=estimator_spec.training_hooks + (examples_hook,))
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in call_without_tpu(self, features, labels, is_export_mode)
1248
1249 def call_without_tpu(self, features, labels, is_export_mode):
-> 1250 return self._call_model_fn(features, labels, is_export_mode=is_export_mode)
1251
1252 def convert_to_single_tpu_train_step(self, dequeue_fn):
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py in _call_model_fn(self, features, labels, is_export_mode)
1522 _add_item_to_params(params, _CTX_KEY, user_context)
1523
-> 1524 estimator_spec = self._model_fn(features=features, **kwargs)
1525 if (running_on_cpu and
1526 isinstance(estimator_spec, model_fn_lib._TPUEstimatorSpec)): # pylint: disable=protected-access
~/deploy/BERT-NER/model.py in model_fn(features, labels, mode, params)
81 (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
82 init_checkpoint)
---> 83 tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
84 if use_tpu:
85 def tpu_scaffold():
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py in init_from_checkpoint(ckpt_dir_or_file, assignment_map)
185 else:
186 distribution_strategy_context.get_tower_context().merge_call(
--> 187 _init_from_checkpoint, ckpt_dir_or_file, assignment_map)
188
189
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/distribute.py in merge_call(self, merge_fn, *args, **kwargs)
1038 """
1039 require_tower_context(self)
-> 1040 return self._merge_call(merge_fn, *args, **kwargs)
1041
1042 def _merge_call(self, merge_fn, *args, **kwargs):
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/distribute.py in _merge_call(self, merge_fn, *args, **kwargs)
1046 self._distribution_strategy))
1047 try:
-> 1048 return merge_fn(self._distribution_strategy, *args, **kwargs)
1049 finally:
1050 _pop_per_thread_mode()
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py in init_from_checkpoint(, ckpt_dir_or_file, assignment_map)
229 else:
230 var_name = ",".join([v.name for v in var])
--> 231 _set_variable_or_list_initializer(var, ckpt_file, tensor_name_in_ckpt)
232 logging.debug("Initialize variable %s from checkpoint %s with %s",
233 var_name, ckpt_dir_or_file, tensor_name_in_ckpt)
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py in _set_variable_or_list_initializer(variable_or_list, ckpt_file, tensor_name)
353 _set_checkpoint_initializer(v, ckpt_file, tensor_name, slice_info.spec)
354 else:
--> 355 _set_checkpoint_initializer(variable_or_list, ckpt_file, tensor_name, "")
356
357
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py in _set_checkpoint_initializer(variable, ckpt_file, tensor_name, slice_spec, name)
316
317 assert len(saveable_objects) == 1 # Should be only one variable.
--> 318 init_op = saveable_objects[0].restore([restore_op], restored_shapes=None)
319
320 # pylint:disable=protected-access
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, restored_tensors, restored_shapes)
159 restored_tensor = array_ops.identity(restored_tensor)
160 return resource_variable_ops.shape_safe_assign_variable_handle(
--> 161 self.handle_op, self._var_shape, restored_tensor)
162
163 def init(self, write_version=saver_pb2.SaverDef.V2):
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py in shape_safe_assign_variable_handle(handle, shape, value, name)
161 return gen_resource_variable_ops.assign_variable_op(handle,
162 value_tensor,
--> 163 name=name)
164
165
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/ops/gen_resource_variable_ops.py in assign_variable_op(resource, value, name)
156 if _ctx is None or not _ctx._eager_context.is_eager:
157 _, _, _op = _op_def_lib._apply_op_helper(
--> 158 "AssignVariableOp", resource=resource, value=value, name=name)
159 return _op
160 _result = None
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
785 op = g.create_op(op_type_name, inputs, output_types, name=scope,
786 input_types=input_types, attrs=attr_protos,
--> 787 op_def=op_def)
788 return output_structure, op_def.is_stateful, op
789
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
486 'in a future version' if date is None else ('after %s' % date),
487 instructions)
--> 488 return func(*args, **kwargs)
489 return tf_decorator.make_decorator(func, new_func, 'deprecated',
490 _add_deprecated_arg_notice_to_docstring(
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in create_op(failed resolving arguments)
3270 input_types=input_types,
3271 original_op=self._default_original_op,
-> 3272 op_def=op_def)
3273 self._create_op_helper(ret, compute_device=compute_device)
3274 return ret
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in init(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1803
1804 if not c_op:
-> 1805 self._control_flow_post_processing()
1806
1807 def _control_flow_post_processing(self):
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _control_flow_post_processing(self)
1814 control_flow_util.CheckInputFromValidContext(self, input_tensor.op)
1815 if self._control_flow_context is not None:
-> 1816 self._control_flow_context.AddOp(self)
1817
1818 def _reconstruct_sequence_inputs(self, op_def, inputs, attrs):
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in AddOp(self, op)
1034
1035 def AddOp(self, op):
-> 1036 self._AddOpInternal(op)
1037
1038 def _AddOpInternal(self, op):
~/.conda/envs/tf/lib/python3.6/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py in _AddOpInternal(self, op)
1042 "Operation of type %s (%s) is not supported on the TPU for inference."
1043 " Execution will fail if this op is used in the graph. Make sure your"
-> 1044 " variables are using variable_scope." % (op.type, op.name))
1045 if self._outer_context:
1046 self._outer_context.AddInnerOp(op)
NotImplementedError: Operation of type AssignVariableOp (AssignVariableOp) is not supported on the TPU for inference. Execution will fail if this op is used in the graph. Make sure your variables are using variable_scope.
def serving_input_fn():
with tf.variable_scope("foo"):
feature_spec = {
"unique_ids": tf.FixedLenFeature([], tf.int64),
"input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
}
serialized_tf_example = tf.placeholder(shape=[None], dtype=tf.string)
serialized_tf_example_1 = tf.placeholder(shape=[None], dtype=tf.string)
serialized_tf_example_2 = tf.placeholder(shape=[None], dtype=tf.string)
serialized_tf_example_3 = tf.placeholder(shape=[None], dtype=tf.string)
received_tensors = {
'unique_ids': serialized_tf_example,
'input_ids': serialized_tf_example_1,
'input_mask': serialized_tf_example_2,
'segment_ids': serialized_tf_example_3,
}
def _decode_record(record):
example = tf.parse_single_example(record, feature_spec)
for name in list(example.keys()):
t = example[name]
if t.dtype == tf.int64:
t = tf.to_int32(t)
return t
features = {}
feature_spec = { "unique_ids": tf.FixedLenFeature([], tf.int64), }
features['unique_ids'] = tf.map_fn(_decode_record, serialized_tf_example, dtype=tf.int32)
feature_spec = { "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), }
features['input_ids'] = tf.map_fn(_decode_record, serialized_tf_example_1, dtype=tf.int32)
feature_spec = { "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), }
features['input_mask'] = tf.map_fn(_decode_record, serialized_tf_example_2, dtype=tf.int32)
feature_spec = { "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), }
features['segment_ids'] = tf.map_fn(_decode_record, serialized_tf_example_3, dtype=tf.int32)
return tf.estimator.export.ServingInputReceiver(features, received_tensors)
NotImplementedError: Operation of type AssignVariableOp (AssignVariableOp) is not supported on the TPU for inference. Execution will fail if this op is used in the graph. Make sure your variables are using variable_scope.
I find my mistake, tf.train.init_from_checkpoint function should not be called during export the model
@sminder Is my serving_input_fn function is correct??
How have you made the change to call export before tf.train.init_from_checkpoint function or how do I stop calling tf.train.init_from_checkpoint at the time of export.
@wazzy I have tested your serving_input_fn function, it's ok.
I found the init_from_checkpoint function in the model_fn using for the estimator, just like this:
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
export code like this:
estimator._export_to_tpu = False # this is important
estimator.export_savedmodel('export_t', serving_input_fn)
@sminder thanks for the input. I think your feedback worked because the saved_model_cli
is able to read the saved model.
saved_model_cli show --all --dir $PROJECT_HOME/export_t/1544203049
MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['input_ids'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: foo/Placeholder_1:0
inputs['input_mask'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: foo/Placeholder_2:0
inputs['label_ids'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: foo/Placeholder:0
inputs['segment_ids'] tensor_info:
dtype: DT_STRING
shape: (-1)
name: foo/Placeholder_3:0
The given SavedModel SignatureDef contains the following output(s):
outputs['output'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 935)
name: loss/Softmax:0
Method name is: tensorflow/serving/predict
the issue i am having now is actually getting predictions from the saved model. Can you provide an example using input_exprs? Following based on example
saved_model_cli run --dir $PROJECT_HOME/export_t/1544203049 --tag_set serve --signature_def serving_default \
--input_exprs 'input_ids=np.zeros((64));input_mask=np.zeros((64));label_ids=[0];segment_ids=np.zeros((64))'
gives
tensorflow.python.framework.errors_impl.InternalError: Unable to get element as bytes.
checkout https://github.com/bigboNed3/bert_serving.
i use tf.estimator.export.build_raw_serving_input_receiver_fn
@lapolonio @wazzy
Hi @bigboNed3
Have you ever met this bug when you tried saved_model_cli command like this:
saved_model_cli run --dir /path/to/your_model/your_model_version --tag_set serve --signature_def serving_default \
--input_exprs 'input_ids=np.zeros((1,128));input_mask=np.zeros((1,128));label_ids=[0];segment_ids=np.zeros((1,128))'
File "/Users/cdong/miniconda3/bin/saved_model_cli", line 11, in <module>
sys.exit(main())
File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 802, in main
args.func(args)
File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 635, in run
args.overwrite, tf_debug=args.tf_debug)
File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 337, in run_saved_model_with_feed_dict
outputs = sess.run(output_tensor_names_sorted, feed_dict=inputs_feed_dict)
File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run
run_metadata_ptr)
File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run
run_metadata)
File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits must be 2-dimensional
[[Node: bert/encoder/layer_0/attention/self/Softmax = Softmax[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](bert/encoder/layer_0/attention/self/$
dd)]]
and the model's structure is
saved_model_cli show --all --dir serving/tensorflow_serving/servables/tensorflow/testdata/bert_sentiment/1544601708
MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['input_ids'] tensor_info:
dtype: DT_INT32
shape: (-1, 128)
name: input_ids_1:0
inputs['input_mask'] tensor_info:
dtype: DT_INT32
shape: (-1, 128)
name: input_mask_1:0
inputs['label_ids'] tensor_info:
dtype: DT_INT32
shape: (-1)
name: label_ids_1:0
inputs['segment_ids'] tensor_info:
dtype: DT_INT32
shape: (-1, 128)
name: segment_ids_1:0
The given SavedModel SignatureDef contains the following output(s):
outputs['output'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 3)
name: loss/Softmax:0
Method name is: tensorflow/serving/predict
@neutronest @wazzy it looks like in https://github.com/bigboNed3/bert_serving run_classifier forgot to skip init_from_checkpoint
when exporting the model.
i'm working on an end to end example.
my input_fn:
def serving_input_fn():
with tf.variable_scope("foo"):
feature_spec = {
"input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"label_ids": tf.FixedLenFeature([], tf.int64),
}
serialized_tf_example = tf.placeholder(dtype=tf.string,
shape=[None],
name='input_example_tensor')
receiver_tensors = {'examples': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
which creates a model that lets you do inference like:
saved_model_cli run --dir $PROJECT_HOME/export_t/1544223413 --tag_set serve --signature_def serving_default \
--input_examples 'examples=[{"input_ids":np.zeros((64), dtype=int).tolist(),"input_mask":np.zeros((64), dtype=int).tolist(),"label_ids":[0],"segment_ids":np.zeros((64), dtype=int).tolist()}]'
Hi @bigboNed3
Have you ever met this bug when you tried saved_model_cli command like this:
saved_model_cli run --dir /path/to/your_model/your_model_version --tag_set serve --signature_def serving_default \ --input_exprs 'input_ids=np.zeros((1,128));input_mask=np.zeros((1,128));label_ids=[0];segment_ids=np.zeros((1,128))' File "/Users/cdong/miniconda3/bin/saved_model_cli", line 11, in <module> sys.exit(main()) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 802, in main args.func(args) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 635, in run args.overwrite, tf_debug=args.tf_debug) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 337, in run_saved_model_with_feed_dict outputs = sess.run(output_tensor_names_sorted, feed_dict=inputs_feed_dict) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run run_metadata_ptr) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run feed_dict_tensor, options, run_metadata) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run run_metadata) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError: logits must be 2-dimensional [[Node: bert/encoder/layer_0/attention/self/Softmax = Softmax[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](bert/encoder/layer_0/attention/self/$ dd)]]
and the model's structure is
saved_model_cli show --all --dir serving/tensorflow_serving/servables/tensorflow/testdata/bert_sentiment/1544601708 MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: signature_def['serving_default']: The given SavedModel SignatureDef contains the following input(s): inputs['input_ids'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: input_ids_1:0 inputs['input_mask'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: input_mask_1:0 inputs['label_ids'] tensor_info: dtype: DT_INT32 shape: (-1) name: label_ids_1:0 inputs['segment_ids'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: segment_ids_1:0 The given SavedModel SignatureDef contains the following output(s): outputs['output'] tensor_info: dtype: DT_FLOAT shape: (-1, 3) name: loss/Softmax:0 Method name is: tensorflow/serving/predict
yes.I met that bug.I can't solve it,
so I use tf.estimator.export.build_raw_serving_input_receiver_fn instead.
hope @lapolonio 's method will work
@lapolonio my export function is like this:
def export_model(dir_path):
MAX_SEQ_LEN = 128
def serving_input_receiver_fn():
"""An input receiver that expects a serialized tf.Example."""
reciever_tensors = {
"input_ids": tf.placeholder(dtype=tf.int64,
shape=[1, MAX_SEQ_LEN])
}
features = {
"input_ids": reciever_tensors['input_ids'],
"input_mask": 1 - tf.cast(tf.equal(reciever_tensors['input_ids'], 0), dtype=tf.int64),
"segment_ids": tf.zeros(dtype=tf.int64,
shape=[1, MAX_SEQ_LEN])
}
return tf.estimator.export.ServingInputReceiver(features, reciever_tensors)
estimator._export_to_tpu = False
estimator.export_savedmodel(dir_path, serving_input_receiver_fn)
and during export the model, I deleted all the unrelated functions for training and evaluation, and it's ok.
Hi @bigboNed3
Have you ever met this bug when you tried saved_model_cli command like this:
saved_model_cli run --dir /path/to/your_model/your_model_version --tag_set serve --signature_def serving_default \ --input_exprs 'input_ids=np.zeros((1,128));input_mask=np.zeros((1,128));label_ids=[0];segment_ids=np.zeros((1,128))' File "/Users/cdong/miniconda3/bin/saved_model_cli", line 11, in <module> sys.exit(main()) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 802, in main args.func(args) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 635, in run args.overwrite, tf_debug=args.tf_debug) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 337, in run_saved_model_with_feed_dict outputs = sess.run(output_tensor_names_sorted, feed_dict=inputs_feed_dict) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run run_metadata_ptr) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run feed_dict_tensor, options, run_metadata) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run run_metadata) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError: logits must be 2-dimensional [[Node: bert/encoder/layer_0/attention/self/Softmax = Softmax[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](bert/encoder/layer_0/attention/self/$ dd)]]
and the model's structure is
saved_model_cli show --all --dir serving/tensorflow_serving/servables/tensorflow/testdata/bert_sentiment/1544601708 MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: signature_def['serving_default']: The given SavedModel SignatureDef contains the following input(s): inputs['input_ids'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: input_ids_1:0 inputs['input_mask'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: input_mask_1:0 inputs['label_ids'] tensor_info: dtype: DT_INT32 shape: (-1) name: label_ids_1:0 inputs['segment_ids'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: segment_ids_1:0 The given SavedModel SignatureDef contains the following output(s): outputs['output'] tensor_info: dtype: DT_FLOAT shape: (-1, 3) name: loss/Softmax:0 Method name is: tensorflow/serving/predict
Hi @bigboNed3
Have you ever met this bug when you tried saved_model_cli command like this:saved_model_cli run --dir /path/to/your_model/your_model_version --tag_set serve --signature_def serving_default \ --input_exprs 'input_ids=np.zeros((1,128));input_mask=np.zeros((1,128));label_ids=[0];segment_ids=np.zeros((1,128))' File "/Users/cdong/miniconda3/bin/saved_model_cli", line 11, in <module> sys.exit(main()) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 802, in main args.func(args) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 635, in run args.overwrite, tf_debug=args.tf_debug) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/tools/saved_model_cli.py", line 337, in run_saved_model_with_feed_dict outputs = sess.run(output_tensor_names_sorted, feed_dict=inputs_feed_dict) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 900, in run run_metadata_ptr) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1135, in _run feed_dict_tensor, options, run_metadata) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1316, in _do_run run_metadata) File "/Users/cdong/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1335, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError: logits must be 2-dimensional [[Node: bert/encoder/layer_0/attention/self/Softmax = Softmax[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](bert/encoder/layer_0/attention/self/$ dd)]]
and the model's structure is
saved_model_cli show --all --dir serving/tensorflow_serving/servables/tensorflow/testdata/bert_sentiment/1544601708 MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: signature_def['serving_default']: The given SavedModel SignatureDef contains the following input(s): inputs['input_ids'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: input_ids_1:0 inputs['input_mask'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: input_mask_1:0 inputs['label_ids'] tensor_info: dtype: DT_INT32 shape: (-1) name: label_ids_1:0 inputs['segment_ids'] tensor_info: dtype: DT_INT32 shape: (-1, 128) name: segment_ids_1:0 The given SavedModel SignatureDef contains the following output(s): outputs['output'] tensor_info: dtype: DT_FLOAT shape: (-1, 3) name: loss/Softmax:0 Method name is: tensorflow/serving/predict
yes.I met that bug.I can't solve it,
so I use tf.estimator.export.build_raw_serving_input_receiver_fn instead.
hope @lapolonio 's method will work
For more information, I have met similar errors such as "logits must be 2-dimensional" while request the service with tensorflow serving, the final reason is the version of tensorflow serving is too low๏ผchange to a higher version 1.12.0 solved the errorใ
when i export my model as follows,how can i send batch requests to the server.
Thanks in advance for all the help.
signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['examples'] tensor_info:
dtype: DT_STRING
shape: unknown_rank
name: input_example_tensor:0
The given SavedModel SignatureDef contains the following output(s):
outputs['end_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:1
outputs['start_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:0
outputs['unique_ids'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: ParseExample/ParseExample:4
Method name is: tensorflow/serving/predict
@sminder @bigboNed3 @wazzy @lapolonio
For my application, I would like to take sentences as inputs and extract word embeddings from layer -2 (as described here: https://github.com/google-research/bert#using-bert-to-extract-fixed-feature-vectors-like-elmo). Any guidance on what I need to do to export a TensorFlow Serving model that can do that?
@sminder do you mind sharing the entire code to export the BERT model?
@lapolonio my export function is like this:
def export_model(dir_path): MAX_SEQ_LEN = 128 def serving_input_receiver_fn(): """An input receiver that expects a serialized tf.Example.""" reciever_tensors = { "input_ids": tf.placeholder(dtype=tf.int64, shape=[1, MAX_SEQ_LEN]) } features = { "input_ids": reciever_tensors['input_ids'], "input_mask": 1 - tf.cast(tf.equal(reciever_tensors['input_ids'], 0), dtype=tf.int64), "segment_ids": tf.zeros(dtype=tf.int64, shape=[1, MAX_SEQ_LEN]) } return tf.estimator.export.ServingInputReceiver(features, reciever_tensors) estimator._export_to_tpu = False estimator.export_savedmodel(dir_path, serving_input_receiver_fn)
and during export the model, I deleted all the unrelated functions for training and evaluation, and it's ok.
Hi, @lapolonio :
Your code works as below, but I have issue use your serving_input_fun to call service through gRPC client, not likely to pass in example in gRPC, have you tries that?
@neutronest @wazzy it looks like in https://github.com/bigboNed3/bert_serving run_classifier forgot to skip
init_from_checkpoint
when exporting the model.i'm working on an end to end example.
my input_fn:def serving_input_fn(): with tf.variable_scope("foo"): feature_spec = { "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "label_ids": tf.FixedLenFeature([], tf.int64), } serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_example_tensor') receiver_tensors = {'examples': serialized_tf_example} features = tf.parse_example(serialized_tf_example, feature_spec) return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
which creates a model that lets you do inference like:
saved_model_cli run --dir $PROJECT_HOME/export_t/1544223413 --tag_set serve --signature_def serving_default \ --input_examples 'examples=[{"input_ids":np.zeros((64), dtype=int).tolist(),"input_mask":np.zeros((64), dtype=int).tolist(),"label_ids":[0],"segment_ids":np.zeros((64), dtype=int).tolist()}]'
@gkovacslmi
Hi, I am trying solution1: convert ckpt to pb file and use Java to serve. The code is as below:
import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from tensorflow.python.saved_model import tag_constants
from tensorflow.core.protobuf import saver_pb2
freeze_graph.freeze_graph(input_graph=MODEL_DIR + SEP + 'graph.pbtxt', input_saver='', input_binary=False, input_checkpoint=MODEL_DIR + SEP + 'model.ckpt-'+version, output_node_names='loss/Softmax', restore_op_name=None, filename_tensor_name=None, output_graph=PB_MODEL_FILE, clear_devices=True, initializer_nodes=None, variable_names_whitelist="", variable_names_blacklist="", input_meta_graph=None, input_saved_model_dir=None, saved_model_tags=tag_constants.SERVING, checkpoint_version=saver_pb2.SaverDef.V2)
but it throws an out of range exception:
Traceback (most recent call last):
File "fileconverter.py", line 29, in <module>
convert(str(2504))
File "fileconverter.py", line 24, in convert
checkpoint_version=saver_pb2.SaverDef.V2)
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/tools/freeze_graph.py", line 363, in freeze_graph
checkpoint_version=checkpoint_version)
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/tools/freeze_graph.py", line 190, in freeze_graph_with_def_protos
var_list=var_list, write_version=checkpoint_version)
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1102, in __init__
self.build()
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1114, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1151, in _build
build_save=build_save, build_restore=build_restore)
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 773, in _build_internal
saveables = self._ValidateAndSliceInputs(names_to_saveables)
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 680, in _ValidateAndSliceInputs
for converted_saveable_object in self.SaveableObjectsForOp(op, name):
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 654, in SaveableObjectsForOp
variable, "", name)
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 128, in __init__
self.handle_op = var.op.inputs[0]
File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2128, in __getitem__
return self._inputs[i]
IndexError: list index out of range
I tried python version 2.7 3.5 and 3.6, tf version 1.11 and 1.12 but doesn't work.
Can you share your freeze_graph?
Thanks in advance for all the help.
@gkovacslmi
Hi, I am trying solution1: convert ckpt to pb file and use Java to serve. The code is as below:import tensorflow as tf from tensorflow.python.tools import freeze_graph from tensorflow.python.saved_model import tag_constants from tensorflow.core.protobuf import saver_pb2 freeze_graph.freeze_graph(input_graph=MODEL_DIR + SEP + 'graph.pbtxt', input_saver='', input_binary=False, input_checkpoint=MODEL_DIR + SEP + 'model.ckpt-'+version, output_node_names='loss/Softmax', restore_op_name=None, filename_tensor_name=None, output_graph=PB_MODEL_FILE, clear_devices=True, initializer_nodes=None, variable_names_whitelist="", variable_names_blacklist="", input_meta_graph=None, input_saved_model_dir=None, saved_model_tags=tag_constants.SERVING, checkpoint_version=saver_pb2.SaverDef.V2)
but it throws an out of range exception:
Traceback (most recent call last): File "fileconverter.py", line 29, in <module> convert(str(2504)) File "fileconverter.py", line 24, in convert checkpoint_version=saver_pb2.SaverDef.V2) File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/tools/freeze_graph.py", line 363, in freeze_graph checkpoint_version=checkpoint_version) File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/tools/freeze_graph.py", line 190, in freeze_graph_with_def_protos var_list=var_list, write_version=checkpoint_version) File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1102, in __init__ self.build() File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1114, in build self._build(self._filename, build_save=True, build_restore=True) File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1151, in _build build_save=build_save, build_restore=build_restore) File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 773, in _build_internal saveables = self._ValidateAndSliceInputs(names_to_saveables) File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 680, in _ValidateAndSliceInputs for converted_saveable_object in self.SaveableObjectsForOp(op, name): File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 654, in SaveableObjectsForOp variable, "", name) File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 128, in __init__ self.handle_op = var.op.inputs[0] File "/data/anaconda3/envs/py35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2128, in __getitem__ return self._inputs[i] IndexError: list index out of range
I tried python version 2.7 3.5 and 3.6, tf version 1.11 and 1.12 but doesn't work.
Can you share your freeze_graph?
Thanks in advance for all the help.
I used export_savedmodel
https://github.com/bigboNed3/bert_serving
@gkovacslmi @wazzy @fuxia0425 @sminder
Anyone has written code to serve the trained model for SQUAD? I am new to tensorflow and it'll be a great help if you could share your code.
Pointing me to some resource that can teach me how to write a serving_function and then use the saved model to make predictions for BERT-for-QuestionAnswering(SQUAD) will be a great help too.
@neutronest @wazzy it looks like in https://github.com/bigboNed3/bert_serving run_classifier forgot to skip
init_from_checkpoint
when exporting the model.i'm working on an end to end example.
my input_fn:def serving_input_fn(): with tf.variable_scope("foo"): feature_spec = { "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "label_ids": tf.FixedLenFeature([], tf.int64), } serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_example_tensor') receiver_tensors = {'examples': serialized_tf_example} features = tf.parse_example(serialized_tf_example, feature_spec) return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
which creates a model that lets you do inference like:
saved_model_cli run --dir $PROJECT_HOME/export_t/1544223413 --tag_set serve --signature_def serving_default \ --input_examples 'examples=[{"input_ids":np.zeros((64), dtype=int).tolist(),"input_mask":np.zeros((64), dtype=int).tolist(),"label_ids":[0],"segment_ids":np.zeros((64), dtype=int).tolist()}]'
Hello, I follow your guide to export model and then I got the same results(output) when I try to use it to predict. I verified all inputs are changed before Session.run() every time, but the output which get_by_name(Softmax:0 or per_example_loss:0) are always the same. Please help me , Thanks!
@ahzz1207 I created an example of serving in google cloud platform using Kubernetes. https://github.com/lapolonio/bert/blob/classify_ag_news/make_containers.sh are the commands. I'm writing a post for medium. https://medium.com/@lapolonio/enterprise-solution-for-text-classification-using-bert-9fe2b7234c46 Feedback appreciated!
@neutronest @wazzy it looks like in https://github.com/bigboNed3/bert_serving run_classifier forgot to skip
init_from_checkpoint
when exporting the model.i'm working on an end to end example.
my input_fn:def serving_input_fn(): with tf.variable_scope("foo"): feature_spec = { "input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64), "label_ids": tf.FixedLenFeature([], tf.int64), } serialized_tf_example = tf.placeholder(dtype=tf.string, shape=[None], name='input_example_tensor') receiver_tensors = {'examples': serialized_tf_example} features = tf.parse_example(serialized_tf_example, feature_spec) return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
which creates a model that lets you do inference like:
saved_model_cli run --dir $PROJECT_HOME/export_t/1544223413 --tag_set serve --signature_def serving_default \ --input_examples 'examples=[{"input_ids":np.zeros((64), dtype=int).tolist(),"input_mask":np.zeros((64), dtype=int).tolist(),"label_ids":[0],"segment_ids":np.zeros((64), dtype=int).tolist()}]'
hi, i use your code to export the bert model, but when I use tf.parse_example, I have encountered an error. do you meet this problem?
@Olivia-xu No. What is your error?
I have a walkthrough here: https://medium.com/@lapolonio/enterprise-solution-for-text-classification-using-bert-9fe2b7234c46 Feedback appreciated!
@lapolonio solution fails on tensorflow 1.13.0rc, I have tried running the colab
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
in ()
20 export_dir_base = 'gs://{}/bert/{}'.format(BUCKET,EXPORT_MODEL_DIR)
21 estimator._export_to_tpu = False # this is important
---> 22 path = estimator.export_saved_model(export_dir_base,serving_input_fn)
23 print(path)
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_saved_model(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path)
708 as_text=as_text,
709 checkpoint_path=checkpoint_path,
--> 710 strip_default_attrs=True)
711
712 def _export_saved_model_for_mode(
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_savedmodel(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs)
637 checkpoint_path=checkpoint_path,
638 strip_default_attrs=strip_default_attrs,
--> 639 mode=model_fn_lib.ModeKeys.PREDICT)
640
641 def export_saved_model(
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_saved_model_for_mode(self, export_dir_base, input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs, mode)
763 as_text=as_text,
764 checkpoint_path=checkpoint_path,
--> 765 strip_default_attrs=strip_default_attrs)
766
767 def _export_all_saved_models(
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra, as_text, checkpoint_path, strip_default_attrs)
881 builder, input_receiver_fn_map, checkpoint_path,
882 strip_default_attrs, save_variables,
--> 883 mode=model_fn_lib.ModeKeys.PREDICT)
884 save_variables = False
885
TypeError: _add_meta_graph_for_mode() got multiple values for argument 'mode'
@shawei3000 Thanks for the feedback! I just forced the version to be Tensorflow==1.12 and it's working again.
P.S. if you could give feedback in the medium post that would help clean up this thread.
So how to write a python client ?
Here is my code
import grpc
import tensorflow as tf
from tensorflow_serving.apis import classification_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
channel = grpc.insecure_channel('127.0.0.1:9000')
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
import numpy as np
examples = [{"input_ids": np.zeros((128), dtype=int).tolist(),
"input_mask": np.zeros((128), dtype=int).tolist(),
"label_ids": [0],
"segment_ids": np.zeros((128), dtype=int).tolist()}]
# ###########################################
example = tf.train.Example()
for key, value in examples[0].items():
for v in value:
example.features.feature[key].int64_list.value.append(v)
request = classification_pb2.ClassificationRequest()
request.model_spec.name = 'default'
request.model_spec.signature_name = 'serving_default'
request.input.example_list.examples.extend(examples)
response = stub.Classify(request, 10.0)
the ERROR:
from ._conv import register_converters as _register_converters
Traceback (most recent call last):
File "/home/xzc/PycharmProjects/TF_Serving/bert_client.py", line 27, in <module>
request.input.example_list.examples.extend(examples)
TypeError: Not a cmessage
@Gpwner you didn't use tensorproto to make the message https://github.com/lapolonio/bert/blob/classify_ag_news/run_app.py#L48
@lapolonio solution fails on tensorflow 1.13.0rc, I have tried running the colab
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) in () 20 export_dir_base = 'gs://{}/bert/{}'.format(BUCKET,EXPORT_MODEL_DIR) 21 estimator._export_to_tpu = False # this is important ---> 22 path = estimator.export_saved_model(export_dir_base,serving_input_fn) 23 print(path) /usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_saved_model(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path) 708 as_text=as_text, 709 checkpoint_path=checkpoint_path, --> 710 strip_default_attrs=True) 711 712 def _export_saved_model_for_mode( /usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in export_savedmodel(self, export_dir_base, serving_input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs) 637 checkpoint_path=checkpoint_path, 638 strip_default_attrs=strip_default_attrs, --> 639 mode=model_fn_lib.ModeKeys.PREDICT) 640 641 def export_saved_model( /usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_saved_model_for_mode(self, export_dir_base, input_receiver_fn, assets_extra, as_text, checkpoint_path, strip_default_attrs, mode) 763 as_text=as_text, 764 checkpoint_path=checkpoint_path, --> 765 strip_default_attrs=strip_default_attrs) 766 767 def _export_all_saved_models( /usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _export_all_saved_models(self, export_dir_base, input_receiver_fn_map, assets_extra, as_text, checkpoint_path, strip_default_attrs) 881 builder, input_receiver_fn_map, checkpoint_path, 882 strip_default_attrs, save_variables, --> 883 mode=model_fn_lib.ModeKeys.PREDICT) 884 save_variables = False 885 TypeError: _add_meta_graph_for_mode() got multiple values for argument 'mode'
i got the same error, do you fix it ?
@yajian how did you get that error? at the top of the script, I install tensorflow==1.12
https://colab.research.google.com/gist/lapolonio/5ac6552fa4d58ea1419cc444d42ff5ff/bert-finetuning-with-cloud-tpu-sentence-and-sentence-pair-classification-tasks.ipynb#scrollTo=Ns3IlhryMY-f
@yajian how did you get that error? at the top of the script, I install tensorflow==1.12
https://colab.research.google.com/gist/lapolonio/5ac6552fa4d58ea1419cc444d42ff5ff/bert-finetuning-with-cloud-tpu-sentence-and-sentence-pair-classification-tasks.ipynb#scrollTo=Ns3IlhryMY-f
@lapolonio Thanks for replay. I think it is really version problem. I can run your demo on 1.10, but get errors on 1.12
finally๏ผI figure it out.
########################################save pb model:
def serving_input_fn():
label_ids = tf.placeholder(tf.int32, [None], name='label_ids')
input_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_ids')
input_mask = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_mask')
segment_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='segment_ids')
input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
'label_ids': label_ids,
'input_ids': input_ids,
'input_mask': input_mask,
'segment_ids': segment_ids,
})()
return input_fn
...
estimator._export_to_tpu = False # this is important
estimator.export_savedmodel(FLAGS.output_dir, serving_input_fn)
###############################################client
channel = grpc.insecure_channel('IP:PORT')
stub = prediction_service_pb2_grpc.PredictionServiceStub (channel)
data = [{"input_ids": np.zeros((128), dtype=int).tolist(), "input_mask": np.zeros((128), dtype=int).tolist(),
"label_ids": [0], "segment_ids": np.zeros((128), dtype=int).tolist()}]
request = predict_pb2.PredictRequest()
request.model_spec.name = 'bert'
request.model_spec.signature_name = 'serving_default'
request.inputs['input_ids'].CopyFrom(
tf.contrib.util.make_tensor_proto(np.zeros((128), dtype=int).tolist(), shape=[1, 128]))
request.inputs['input_mask'].CopyFrom(
tf.contrib.util.make_tensor_proto(np.zeros((128), dtype=int).tolist(), shape=[1, 128]))
request.inputs['label_ids'].CopyFrom (tf.contrib.util.make_tensor_proto([0], shape=[1, 1]))
request.inputs['segment_ids'].CopyFrom(
tf.contrib.util.make_tensor_proto(np.zeros((128), dtype=int).tolist(), shape=[1, 128]))
import time
begin = time.time()
result = stub.Predict(request, 10.0) # 10 secs timeout
end = time.time() - begin
print('time {}'.format(end))
print('length of probabilities:{}'.format(len (result.outputs['probabilities'].float_val)))
@Gpwner you didn't use tensorproto to make the message https://github.com/lapolonio/bert/blob/classify_ag_news/run_app.py#L48
@lapolonio thanks !
@Olivia-xu No. What is your error?
I have a walkthrough here: https://medium.com/@lapolonio/enterprise-solution-for-text-classification-using-bert-9fe2b7234c46 Feedback appreciated!
I am very sorry about the reply so late. I think this code is right. My environment is set up incorrectly. After I changed to the python3 environment, it runs correctly. Sorry.
export bert with tf.Saver, just like https://github.com/yajian/bert/blob/master/model_exporter.py
@ahzz1207 I created an example of serving in google cloud platform using Kubernetes. https://github.com/lapolonio/bert/blob/classify_ag_news/make_containers.sh are the commands. I'm writing a post for medium. https://medium.com/@lapolonio/enterprise-solution-for-text-classification-using-bert-9fe2b7234c46 Feedback appreciated!
Hi, i corrected this problem one day after but forgot to reply.
You can see this Text multi-label classification with Bert as service in here.I hoping can get your advise!
@lapolonio @Gpwner @bigboNed3
finally๏ผI figure it out.
########################################save pb model:def serving_input_fn(): label_ids = tf.placeholder(tf.int32, [None], name='label_ids') input_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_ids') input_mask = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_mask') segment_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='segment_ids') input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({ 'label_ids': label_ids, 'input_ids': input_ids, 'input_mask': input_mask, 'segment_ids': segment_ids, })() return input_fn ... estimator._export_to_tpu = False # this is important estimator.export_savedmodel(FLAGS.output_dir, serving_input_fn)
###############################################client
channel = grpc.insecure_channel('IP:PORT') stub = prediction_service_pb2_grpc.PredictionServiceStub (channel) data = [{"input_ids": np.zeros((128), dtype=int).tolist(), "input_mask": np.zeros((128), dtype=int).tolist(), "label_ids": [0], "segment_ids": np.zeros((128), dtype=int).tolist()}] request = predict_pb2.PredictRequest() request.model_spec.name = 'bert' request.model_spec.signature_name = 'serving_default' request.inputs['input_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(np.zeros((128), dtype=int).tolist(), shape=[1, 128])) request.inputs['input_mask'].CopyFrom( tf.contrib.util.make_tensor_proto(np.zeros((128), dtype=int).tolist(), shape=[1, 128])) request.inputs['label_ids'].CopyFrom (tf.contrib.util.make_tensor_proto([0], shape=[1, 1])) request.inputs['segment_ids'].CopyFrom( tf.contrib.util.make_tensor_proto(np.zeros((128), dtype=int).tolist(), shape=[1, 128])) import time begin = time.time() result = stub.Predict(request, 10.0) # 10 secs timeout end = time.time() - begin print('time {}'.format(end)) print('length of probabilities:{}'.format(len (result.outputs['probabilities'].float_val)))
Hi, I am trying to make a tensorflow-serving client for the SQUAD task. However, I get the error grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with: status = StatusCode.INVALID_ARGUMENT details = "unique_ids_1_1:0 is both fed and fetched."
This is the output of my saved_model_cli
`MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['input_ids'] tensor_info:
dtype: DT_INT64
shape: (-1, 384)
name: input_ids_1:0
inputs['input_mask'] tensor_info:
dtype: DT_INT64
shape: (-1, 384)
name: input_mask_1:0
inputs['segment_ids'] tensor_info:
dtype: DT_INT64
shape: (-1, 384)
name: segment_ids_1:0
inputs['unique_ids'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: unique_ids_1_1:0
The given SavedModel SignatureDef contains the following output(s):
outputs['end_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:1
outputs['start_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:0
outputs['unique_ids'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: unique_ids_1_1:0
Method name is: tensorflow/serving/predict
From what i understand, that i can't have the same tensor in both input and output. Any workaround for this? Should i just remove the unique_id from the output tensors, while exporting the model?
@ronalddas yeah I think you can take it out. i think that the order of predictions in a batch is stable. You can test locally and verify.
Hi guys first off I have to say I am new to tensorflow and machine learning.
I have been trying to get this fine-tuned classification working for the last 2-3 weeks and I'm getting closer thanks to all the useful posts on here.
@lapolonio I have done a classifier similar to your AgnewsProcessor but instead it classifies emotions - 'anger', 'joy' etc... It's called the IsearProcessor and I have fine-tuned on the Isear.csv dataset. Thanks for your samples they have really helped!
I'm at the point now where I have a fine-tuned model 'model_6' and am posting to flask locally using run_app.py. I have tensorflow/serving running locally with "docker run -p 8500:8500 -p 8501:8501 --mount type=bind,source=c:\models\model_6,target=/models/model_6 -e MODEL_NAME=model_6 -t _tensorflow/serving"
It is getting a prediction from the model every time but it's always the same prediction - joy !!
I think there might be something missing in my serving_input_receiver_fn() but I've tried a lot of variations and they all seem to lead back to this one which has been posted above.
I would really appreciate some guidance on this - I'm stuck in a rut at the moment but I feel it may be something small I am missing! The code for the serving_input_receiver_fn() and IsearProcessor are below.
As I mentioned, my problem is that the same prediction is being returned each time - "joy" and/or the float_val are always around the same:
float_val: 0.14242129027843475
float_val: 0.1477668285369873
float_val: 0.13308830559253693
float_val: 0.1376849114894867
float_val: 0.15062256157398224
float_val: 0.1419164538383484
float_val: 0.14649958908557892
Thanks,
- Actually, forgot to mention - when I run the test data from test.tsv and do-predict=True then the predictions work ok!
_if FLAGS.do_serve:
feature_spec = {
"input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"label_ids": tf.FixedLenFeature([], tf.int64),
"is_real_example": tf.FixedLenFeature([], tf.int64)
}
def serving_input_receiver_fn():
#with tf.variable_scope("foo"):
serialized_tf_example = tf.placeholder(dtype=tf.string,
shape=[None],
name='input_example_tensor')
receiver_tensors = {'examples': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
.......
class IsearProcessor(DataProcessor):
"""Processor for the MultiNLI data set (GLUE version)."""
def get_train_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
def get_dev_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "dev.tsv")),
"dev_matched")
def get_test_examples(self, data_dir):
"""See base class."""
return self._create_examples(
self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
def get_labels(self):
"""See base class."""
return [
"anger",
"disgust",
"fear",
"guilt",
"joy",
"sadness",
"shame"
]
def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (i, line) in enumerate(lines):
if i == 0: # for header
continue
single_example = self._create_example(line, set_type)
examples.append(single_example)
return examples
def create_example(self, line, set_type):
guid = "%s-%s" % (set_type, tokenization.convert_to_unicode(line[0]))
text_a = tokenization.convert_to_unicode(line[1])
if set_type == "test":
label = "joy"
else:
label = tokenization.convert_to_unicode(line[-1])
single_example = InputExample(guid=guid, text_a=text_a, label=label)
return single_example
@tomaryancit can you create a pull request? it's hard to figure out what's going on in the code you posted.
general suggestions:
how is the performance? during training is it increasing?
try testing examples where you know the class
@lapolonio many thanks for the reply.
I have created a pull request and uploaded run_classifier.py.
I am new to the github pull requests also, so hope I've done that Ok and you can see the file!
I'm happy with my testing on the model - it's just the serving that i am struggling with.
The idea of this fine-tuned BERT model is that I could feed in a sentence such as 'I am really happy' or 'I am feeling sad' and the model would classify the sentence under one of the 7 emotion labels that are specified the ISEAR dataset - https://www.unige.ch/cisa/research/materials-and-online-research/research-material/ .
However I am getting the same prediction every time. regardless of the sentence that i serve to the model.
I will upload the test.tsv and test_results.tsv in a while as they are on another pc.
Thanks again.
@lapolonio many thanks for the reply.
I have created a pull request and uploaded run_classifier.py.
I am new to the github pull requests also, so hope I've done that Ok and you can see the file!I'm happy with my testing on the model - it's just the serving that i am struggling with.
The idea of this fine-tuned BERT model is that I could feed in a sentence such as 'I am really happy' or 'I am feeling sad' and the model would classify the sentence under one of the 7 emotion labels that are specified the ISEAR dataset - https://www.unige.ch/cisa/research/materials-and-online-research/research-material/ .
However I am getting the same prediction every time. regardless of the sentence that i serve to the model.
I will upload the test.tsv and test_results.tsv in a while as they are on another pc.
Thanks again.
Hi, I meet the same error like you that getting the same prediction every time when I followed their methods three months ago. You can see thesehttps://github.com/ahzz1207/Bert-with-text-classification-and-service, I used my run_classifier file to correct this error.Hoping that would help you.
@lapolonio many thanks for the reply.
I have created a pull request and uploaded run_classifier.py.
I am new to the github pull requests also, so hope I've done that Ok and you can see the file!
I'm happy with my testing on the model - it's just the serving that i am struggling with.
The idea of this fine-tuned BERT model is that I could feed in a sentence such as 'I am really happy' or 'I am feeling sad' and the model would classify the sentence under one of the 7 emotion labels that are specified the ISEAR dataset - https://www.unige.ch/cisa/research/materials-and-online-research/research-material/ .
However I am getting the same prediction every time. regardless of the sentence that i serve to the model.
I will upload the test.tsv and test_results.tsv in a while as they are on another pc.
Thanks again.Hi, I meet the same error like you that getting the same prediction every time when I followed their methods three months ago. You can see thesehttps://github.com/ahzz1207/Bert-with-text-classification-and-service, I used my run_classifier file to correct this error.Hoping that would help you.
@ahzz1207 thanks for the reply.
When I click that link you provided I get "No results matched your search".
@lapolonio many thanks for the reply.
I have created a pull request and uploaded run_classifier.py.
I am new to the github pull requests also, so hope I've done that Ok and you can see the file!
I'm happy with my testing on the model - it's just the serving that i am struggling with.
The idea of this fine-tuned BERT model is that I could feed in a sentence such as 'I am really happy' or 'I am feeling sad' and the model would classify the sentence under one of the 7 emotion labels that are specified the ISEAR dataset - https://www.unige.ch/cisa/research/materials-and-online-research/research-material/ .
However I am getting the same prediction every time. regardless of the sentence that i serve to the model.
I will upload the test.tsv and test_results.tsv in a while as they are on another pc.
Thanks again.Hi, I meet the same error like you that getting the same prediction every time when I followed their methods three months ago. You can see thesehttps://github.com/ahzz1207/Bert-with-text-classification-and-service, I used my run_classifier file to correct this error.Hoping that would help you.
@ahzz1207 thanks for the reply.
When I click that link you provided I get "No results matched your search".
Or you can just go to my github page, you will find it easily with the name.
@ahzz1207 I got it here thank you https://github.com/ahzz1207/Bert-with-text-classification-and-service.
So I see you have test_serving_model.py as a version of run_classify.py .
First thing I notice is that you dont have a serving_input_receiver_fn() which is the suggested method for tensorflow serving.
Have you taken a different approach to serving the model? Not sure I understand how you are serving.
@ahzz1207 I got it here thank you https://github.com/ahzz1207/Bert-with-text-classification-and-service.
So I see you have test_serving_model.py as a version of run_classify.py .
First thing I notice is that you dont have a serving_input_receiver_fn() which is the suggested method for tensorflow serving.
Have you taken a different approach to serving the model? Not sure I understand how you are serving.
I don't sure that you are saying is code in line 615~650 called input_fn method.
@gkovacslmi @wazzy @fuxia0425 @sminder
Anyone has written code to serve the trained model for SQUAD? I am new to tensorflow and it'll be a great help if you could share your code.
Pointing me to some resource that can teach me how to write a serving_function and then use the saved model to make predictions for BERT-for-QuestionAnswering(SQUAD) will be a great help too.
i have the same puzzled,have you solved the problem?
What is the size of your fine-tuned bert model?
Mine is 680MB (model is 900KB and variables is 678.47 MB), which is much larger than the 250MB limit for Google's AI Platform. I presume my model size is typical, or is there something I can do to cut down on the model size? (I have already requested Google to increase this quota for me)
estimator._export_to_tpu = False
This single line saved my life. Shouldn't it be in the documentation, or did I just miss it?
and how to predict when a text input by tensorflow-serving while get the bert pb model? thx
I've been playing around with the IMDB classifier example, and to finish up have been trying to write a serving_input_fn
which will take plain text as input and calculate the features to pass to the model, to save a client needing to calculate them. I've managed to write one that saves happily without errors, but when I reload and try to predict, I keep getting an error of the form:
~/ml_exp/test_env/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1368 pass
1369 message = error_interpolation.interpolate(message, self._graph)
-> 1370 raise type(e)(node_def, op, message)
1371
1372 def _extend_graph(self):
InvalidArgumentError: Could not parse example input, value: '[{"input_ids": [101, 23435, 1006, 1000, 7953, 1035, 5164, 1035, 3793, 1024, 1014, 1000, 1010, 26718, 18863, 1027, 5164, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "input_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "segment_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "label_ids": 0}, {"input_ids": [101, 23435, 1006, 1000, 7953, 1035, 5164, 1035, 3793, 1024, 1014, 1000, 1010, 26718, 18863, 1027, 5164, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "input_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "segment_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "label_ids": 0}]'
[[node ParseExample/ParseExample (defined at :7) ]]
My code is below. I've been hitting my head against this for days and not getting anywhere. The example looks to be of the right format to me. Am I doing something stupid, or is this an issue?
import json
import base64
def plain_text_serving_input_fn():
input_string = tf.placeholder(dtype=tf.string, shape=None, name='input_string_text')
# What format to expect input in.
receiver_tensors = {'input_text': input_string}
input_examples = [run_classifier.InputExample(guid="", text_a = str(input_string), text_b = None, label = 0)] # here, "" is just a dummy label
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
variables = {}
for i in input_features:
variables["input_ids"] = i.input_ids
variables["input_mask"] = i.input_mask
variables["segment_ids"] = i.segment_ids
variables["label_ids"] = i.label_id
feature_spec = {
"input_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"input_mask" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"segment_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"label_ids" : tf.FixedLenFeature([], tf.int64)
}
string_variables = json.dumps(variables)
encode_input = base64.b64encode(string_variables.encode('utf-8'))
encode_string = base64.decodestring(encode_input)
features_to_input = tf.parse_example([encode_string], feature_spec)
return tf.estimator.export.ServingInputReceiver(features_to_input, receiver_tensors)
estimator._export_to_tpu = False
estimator.export_saved_model(
export_dir_base,
plain_text_serving_input_fn
)
predict_fn = predictor.from_saved_model(export_dir)
predictions = predict_fn({'input_text': "This is some example text" })
I've been playing around with the IMDB classifier example, and to finish up have been trying to write a
serving_input_fn
which will take plain text as input and calculate the features to pass to the model, to save a client needing to calculate them. I've managed to write one that saves happily without errors, but when I reload and try to predict, I keep getting an error of the form:~/ml_exp/test_env/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1368 pass
1369 message = error_interpolation.interpolate(message, self._graph)
-> 1370 raise type(e)(node_def, op, message)
1371
1372 def _extend_graph(self):
InvalidArgumentError: Could not parse example input, value: '[{"input_ids": [101, 23435, 1006, 1000, 7953, 1035, 5164, 1035, 3793, 1024, 1014, 1000, 1010, 26718, 18863, 1027, 5164, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "input_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "segment_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "label_ids": 0}, {"input_ids": [101, 23435, 1006, 1000, 7953, 1035, 5164, 1035, 3793, 1024, 1014, 1000, 1010, 26718, 18863, 1027, 5164, 1007, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "input_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "segment_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "label_ids": 0}]'
[[node ParseExample/ParseExample (defined at :7) ]]My code is below. I've been hitting my head against this for days and not getting anywhere. The example looks to be of the right format to me. Am I doing something stupid, or is this an issue?
import json import base64 def plain_text_serving_input_fn(): input_string = tf.placeholder(dtype=tf.string, shape=None, name='input_string_text') # What format to expect input in. receiver_tensors = {'input_text': input_string} input_examples = [run_classifier.InputExample(guid="", text_a = str(input_string), text_b = None, label = 0)] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer) variables = {} for i in input_features: variables["input_ids"] = i.input_ids variables["input_mask"] = i.input_mask variables["segment_ids"] = i.segment_ids variables["label_ids"] = i.label_id feature_spec = { "input_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64), "input_mask" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64), "segment_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64), "label_ids" : tf.FixedLenFeature([], tf.int64) } string_variables = json.dumps(variables) encode_input = base64.b64encode(string_variables.encode('utf-8')) encode_string = base64.decodestring(encode_input) features_to_input = tf.parse_example([encode_string], feature_spec) return tf.estimator.export.ServingInputReceiver(features_to_input, receiver_tensors) estimator._export_to_tpu = False estimator.export_saved_model( export_dir_base, plain_text_serving_input_fn ) predict_fn = predictor.from_saved_model(export_dir) predictions = predict_fn({'input_text': "This is some example text" })
I'm pretty new to TF and I could not figure out what is missing in this. Did anyone get this to work?
Mark
One thing that I don't understand in code snippets in this issue/thread: if this is for serving/predicting, not for training, then what's the need for label_ids
in build_raw_serving_input_receiver_fn
and similar functions..?
that's what the model input function specifies but depending on the mode it might not be used
@AndrewMcDowell were you able to resolve the issue of InvalidArgumentError?
@pcnfernando No, unfortunately not. I spent a while trying different things, but nothing worked for me.
@AndrewMcDowell it looks like your features_to_input, receiver_tensors don't match. check out https://stackoverflow.com/questions/53410469/tensorflow-estimator-servinginputreceiver-features-vs-receiver-tensors-when-and
Your concrete next steps are checking that parse_example returns a SparseTensor
I also encountered this problem, which made me exhausted.
But when I checked my environment, I found that they were not the same. Then I reinstall the environment and solved the problem.
have problem:
serving env:
TensorFlow ModelServer: 1.10.0-dev
TensorFlow Library: 1.9.0
export env :
TensorFlow: 1.12.0
no problem:
serving env:
TensorFlow ModelServer: 1.10.0-dev
TensorFlow Library: 1.9.0
export env :
TensorFlow: 1.10.0
@lapolonio the intention is to input a text as input and the model to handle feature extraction within the model itself and thereafter return the result.
Following is the process of saving the model. Can you give some insight on what needs to be changed here?
import json
import base64
def plain_text_serving_input_fn():
input_string = tf.placeholder(dtype=tf.string, shape=None, name='input_string_text')
# What format to expect input in.
receiver_tensors = {'input_text': input_string}
input_examples = [run_classifier.InputExample(guid="", text_a = str(input_string), text_b = None, label = 0)] # here, "" is just a dummy label
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
variables = {}
for i in input_features:
variables["input_ids"] = i.input_ids
variables["input_mask"] = i.input_mask
variables["segment_ids"] = i.segment_ids
variables["label_ids"] = i.label_id
feature_spec = {
"input_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"input_mask" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"segment_ids" : tf.FixedLenFeature([MAX_SEQ_LENGTH], tf.int64),
"label_ids" : tf.FixedLenFeature([], tf.int64)
}
string_variables = json.dumps(variables)
encode_input = base64.b64encode(string_variables.encode('utf-8'))
encode_string = base64.decodestring(encode_input)
features_to_input = tf.parse_example([encode_string], feature_spec)
return tf.estimator.export.ServingInputReceiver(features_to_input, receiver_tensors)
estimator._export_to_tpu = False
estimator.export_saved_model('/anaconda3/envs/tf/exported_with_txt',plain_text_serving_input_fn)
@pcnfernando you don't need to use parse_example refer to build_raw_serving_input_receiver_fn above
I exported using build_raw_serving_input_receiver_fn and below is my signature def.
(base) Chirans-MacBook-Pro:bin chiranfernando$ ./saved_model_cli show --all --dir /anaconda3/envs/tf/text_input_model/1569807768 --tag_set serve
MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['input_ids'] tensor_info:
dtype: DT_INT32
shape: (-1, 128)
name: input_ids_1:0
inputs['input_mask'] tensor_info:
dtype: DT_INT32
shape: (-1, 128)
name: input_mask_1:0
inputs['label_ids'] tensor_info:
dtype: DT_INT32
shape: (-1)
name: label_ids_1:0
inputs['segment_ids'] tensor_info:
dtype: DT_INT32
shape: (-1, 128)
name: segment_ids_1:0
The given SavedModel SignatureDef contains the following output(s):
outputs['labels'] tensor_info:
dtype: DT_INT32
shape: unknown_rank
name: loss/Squeeze:0
outputs['probabilities'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 2)
name: loss/LogSoftmax:0
Method name is: tensorflow/serving/predict
My confusion is how I can pass the sentence to it. Can you please reference me a sample input.
I'm trying to use the model with tensorflow java. I'm not using tensorflow serving.
@pcnfernando
you have to tokenize the string first. which turns the words into numbers. then those numbers are fed into the model.
As a summary and simplification of all comments above , I found the following method useful in generating SavedModel. The method bypasses serialization and deserialization related with tf.InputExample
and the generated SavedModel accepts numpy arrays as inputs and so for benchmarking purpose it suffices to give synthetic array inputs (e.g., using np.zeros
). You may edit the estimator.export_saved_model
function call to put Savedmodel to another path.
- Apply the following patch to this repo
diff --git a/run_classifier.py b/run_classifier.py
index 817b147..c9426bc 100644
--- a/run_classifier.py
+++ b/run_classifier.py
@@ -955,6 +955,18 @@ def main(_):
drop_remainder=predict_drop_remainder)
result = estimator.predict(input_fn=predict_input_fn)
+ features = {
+ "input_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_ids'),
+ "input_mask": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_mask'),
+ "segment_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='segment_ids'),
+ "label_ids": tf.placeholder(shape=[None], dtype=tf.int32, name='label_ids'),
+ "is_real_example": tf.placeholder(shape=[None], dtype=tf.int32, name='is_real_example'),
+ }
+ serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(features)
+ estimator._export_to_tpu = False ## !!important to add this
+ estimator.export_saved_model(
+ export_dir_base='./bert_classifier_saved_model',
+ serving_input_receiver_fn=serving_input_fn)
output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
with tf.gfile.GFile(output_predict_file, "w") as writer:
diff --git a/run_squad.py b/run_squad.py
index edd4c3e..7b49d9b 100644
--- a/run_squad.py
+++ b/run_squad.py
@@ -1250,6 +1250,18 @@ def main(_):
is_training=False,
drop_remainder=False)
+ features = {
+ "input_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_ids'),
+ "input_mask": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_mask'),
+ "segment_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='segment_ids'),
+ "unique_ids": tf.placeholder(shape=[None], dtype=tf.int32, name='unique_ids'),
+ }
+ serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(features)
+ estimator._export_to_tpu = False ## !!important to add this
+ estimator.export_saved_model(
+ export_dir_base='./bert_squad_saved_model',
+ serving_input_receiver_fn=serving_input_fn)
+
# If running eval on the TPU, you will need to specify the number of
# steps.
all_results = []
- Execute run_classifier.py or run_squad.py with
--do_train=true --do_predict=true
, then find the SavedModel in ./bert_classifier_saved_model or ./bert_squad_saved_model.
As a summary and simplification of all comments above , I found the following method useful in generating SavedModel. The method bypasses serialization and deserialization related with
tf.InputExample
and the generated SavedModel accepts numpy arrays as inputs and so for benchmarking purpose it suffices to give synthetic array inputs (e.g., usingnp.zeros
). You may edit theestimator.export_saved_model
function call to put Savedmodel to another path.
- Apply the following patch to this repo
diff --git a/run_classifier.py b/run_classifier.py index 817b147..c9426bc 100644 --- a/run_classifier.py +++ b/run_classifier.py @@ -955,6 +955,18 @@ def main(_): drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) + features = { + "input_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_ids'), + "input_mask": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_mask'), + "segment_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='segment_ids'), + "label_ids": tf.placeholder(shape=[None], dtype=tf.int32, name='label_ids'), + "is_real_example": tf.placeholder(shape=[None], dtype=tf.int32, name='is_real_example'), + } + serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(features) + estimator._export_to_tpu = False ## !!important to add this + estimator.export_saved_model( + export_dir_base='./bert_classifier_saved_model', + serving_input_receiver_fn=serving_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: diff --git a/run_squad.py b/run_squad.py index edd4c3e..7b49d9b 100644 --- a/run_squad.py +++ b/run_squad.py @@ -1250,6 +1250,18 @@ def main(_): is_training=False, drop_remainder=False) + features = { + "input_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_ids'), + "input_mask": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='input_mask'), + "segment_ids": tf.placeholder(shape=[None, FLAGS.max_seq_length], dtype=tf.int32, name='segment_ids'), + "unique_ids": tf.placeholder(shape=[None], dtype=tf.int32, name='unique_ids'), + } + serving_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(features) + estimator._export_to_tpu = False ## !!important to add this + estimator.export_saved_model( + export_dir_base='./bert_squad_saved_model', + serving_input_receiver_fn=serving_input_fn) + # If running eval on the TPU, you will need to specify the number of # steps. all_results = []
- Execute run_classifier.py or run_squad.py with
--do_train=true --do_predict=true
, then find the SavedModel in ./bert_classifier_saved_model or ./bert_squad_saved_model.
The serialization was unnecessarily complicating the process for me. Following your advice I got rid of it and things went much more smoothly. Thank you.
Maybe check this out if you are looking for serving BERT fine-tuned model.
BERT Serving and Inferencing from fine-tuned
when i export my model as follows,how can i send batch requests to the server.
Thanks in advance for all the help.signature_def['serving_default']:
The given SavedModel SignatureDef contains the following input(s):
inputs['examples'] tensor_info:
dtype: DT_STRING
shape: unknown_rank
name: input_example_tensor:0
The given SavedModel SignatureDef contains the following output(s):
outputs['end_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:1
outputs['start_logits'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 384)
name: unstack:0
outputs['unique_ids'] tensor_info:
dtype: DT_INT64
shape: (-1)
name: ParseExample/ParseExample:4
Method name is: tensorflow/serving/predict
Hi did you figure out how to do batch prediction?
I am training a Bert model using a google cloud storage bucket with the given data set. For some reason, at the final step, I get this error,
Tensor conversion requested dtype string for Tensor with dtype float32.
At first, I thought it was a problem with my dataset, but after some alterations with it, it did not turn out to be that. Does anyone have any ideas on why it is giving this exception?
2020-06-13 02:50:45,710 : Calling model_fn.
2020-06-13 02:50:45,723 : Error recorded from training_loop: in converted code:
relative to /usr/local/lib/python3.6/dist-packages/tensorflow_core/python:
data/ops/readers.py:336 __init__
filenames, compression_type, buffer_size, num_parallel_reads)
data/ops/readers.py:296 __init__
filenames = _create_or_validate_filenames_dataset(filenames)
data/ops/readers.py:56 _create_or_validate_filenames_dataset
filenames = ops.convert_to_tensor(filenames, dtype=dtypes.string)
framework/ops.py:1184 convert_to_tensor
return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
framework/ops.py:1242 convert_to_tensor_v2
as_ref=False)
framework/ops.py:1273 internal_convert_to_tensor
(dtype.name, value.dtype.name, value))
ValueError: Tensor conversion requested dtype string for Tensor with dtype float32: <tf.Tensor 'args_0:0' shape=() dtype=float32>
2020-06-13 02:50:45,724 : training_loop marked as finished
2020-06-13 02:50:45,725 : Reraising captured error
ValueError Traceback (most recent call last)
in ()
----> 1 estimator.train(input_fn=train_input_fn, max_steps=TRAIN_STEPS)
27 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
ValueError: in converted code:
relative to /usr/local/lib/python3.6/dist-packages/tensorflow_core/python:
data/ops/readers.py:336 __init__
filenames, compression_type, buffer_size, num_parallel_reads)
data/ops/readers.py:296 __init__
filenames = _create_or_validate_filenames_dataset(filenames)
data/ops/readers.py:56 _create_or_validate_filenames_dataset
filenames = ops.convert_to_tensor(filenames, dtype=dtypes.string)
framework/ops.py:1184 convert_to_tensor
return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
framework/ops.py:1242 convert_to_tensor_v2
as_ref=False)
framework/ops.py:1273 internal_convert_to_tensor
(dtype.name, value.dtype.name, value))
ValueError: Tensor conversion requested dtype string for Tensor with dtype float32: <tf.Tensor 'args_0:0' shape=() dtype=float32>
Hi,
I'm trying to serve BERT-squad model on docker with REST
- I export BERT-squad model with below code AND got the
export_model (saved_model.pb, variables, ... etc)
files
def serving_input_receiver_fn():
feature_spec = {
"unique_ids": tf.FixedLenFeature([], tf.int64),
"input_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"input_mask": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
"segment_ids": tf.FixedLenFeature([FLAGS.max_seq_length], tf.int64),
}
serialized_tf_example = tf.placeholder(dtype=tf.string,
shape=[1],
name='input_example_tensor')
receiver_tensors = {'examples': serialized_tf_example}
features = tf.parse_example(serialized_tf_example, feature_spec)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
estimator._export_to_tpu = False
estimator.export_savedmodel(os.path.join(FLAGS.output_dir, "export_model"), serving_input_receiver_fn)
- I run docker for serving
export_model
with REST (gRPC:8500, port:8501)
docker run -t --restart always -p 8501:8501
-v "${QA_PATH}:/models/squad"
-e MODEL_NAME=squad
tensorflow/serving &
And then, I can't find how to make input form for requesting BERT-squad model.
I tried read_squad_examples()
, FeatureWriter()
, convert_examples_to_features()
functions to make a input form. But I failed all (Reference: https://medium.com/@joyceye04/deploy-a-servable-bert-qa-model-using-tensorflow-serving-d848f9797d9)
In BERT-classify case, Just embed text to number using vocab. But BERT-squad, It's not easy I think
How can I make a input form for REST?
has anyone been able to do this?
does this help?
https://www.tensorflow.org/guide/estimator#savedmodels_from_estimators
Okay finally figured out how to do this on Vertex AI with a SQuAD2 fine-tuned model using tensorflow version 1.15.
First of all per this comment deepjavalibrary/djl#152 to this issue deepjavalibrary/djl#152
we need to change the following around line 672 in run_squad.py:
...
predictions = {
"unique_ids": unique_ids,
"start_logits": start_logits,
"end_logits": end_logits,
}
...
to
...
predictions = {
"unique_ids": tf.identity(unique_ids),
"start_logits": start_logits,
"end_logits": end_logits,
}
...
to avoid "unique_ids is both fed and fetched" errors.
Then we can export the saved_model.pb by adding the following to run_squad.py (i add just after if FLAGS.do_predict:):
def serving_input_receiver_fn():
unique_ids = tf.placeholder(tf.int32, [None], name='unique_ids')
input_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_ids')
input_mask = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='input_mask')
segment_ids = tf.placeholder(tf.int32, [None, FLAGS.max_seq_length], name='segment_ids')
input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn({
'unique_ids': unique_ids,
'input_ids': input_ids,
'input_mask': input_mask,
'segment_ids': segment_ids,
})()
return input_fn
estimator._export_to_tpu = False
estimator.export_savedmodel(FLAGS.output_dir, serving_input_receiver_fn)
it is important to use raw_serving since "[...] the HTTP server for pre-built Vertex AI deployment containers does not support the HTTP request body for TensorFlow 1.x estimators. Using raw predict, one can send raw content through the HTTP server that is presented to the model input as-is -- no canonical processing." as per this guide: https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/ml_ops/stage6/get_started_with_raw_predict.ipynb
The usual predict script can then be run which exports a saved_model.pb file.
This model file can be manually imported and deployed in vertex ai using the vertex ai gui.
To make a prediction
Read input data like this
input_data = "./dev-v2.0.json"
tokenizer = tokenization.FullTokenizer(vocab_file="./vocab.txt", do_lower_case=True)
eval_examples = read_squad_examples(input_data, is_training=False)
eval_example = [eval_examples[0]]
feature,input_feature = convert_examples_to_features(
examples=eval_example,
tokenizer=tokenizer,
max_seq_length=384,
doc_stride=128,
max_query_length=64,
is_training=False,
output_fn=[])
eval_features = [input_feature]
import json
from google.api import httpbody_pb2
from google.cloud import aiplatform_v1
ENDPOINT_ID="ENDPOINT_ID"
PROJECT_ID="PROJECT_ID"
DATA = {
"signature_name": "serving_default",
"instances": [
{
"unique_ids": feature["unique_id"],
"input_ids": feature["input_ids"],
"input_mask": feature["input_mask"],
"segment_ids": feature["segment_ids"]
}
]
}
http_body = httpbody_pb2.HttpBody(
data=json.dumps(DATA).encode("utf-8"),
content_type="application/json",
)
req = aiplatform_v1.RawPredictRequest(
http_body=http_body, endpoint=f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{ENDPOINT_ID}"
)
REGION="us-central1"
API_ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)
client_options = {"api_endpoint": API_ENDPOINT}
pred_client = aip.gapic.PredictionServiceClient(client_options=client_options)
response = pred_client.raw_predict(req)
print(response)
then the write_predictions function in run_squad.py can be used to write these
data = json.loads(response.data)
RawResult = collections.namedtuple("RawResult",
["unique_id", "start_logits", "end_logits"])
unique_id = data["predictions"][0]["unique_ids"]
start_logits = data["predictions"][0]["start_logits"]
end_logits = data["predictions"][0]["end_logits"]
formatted_result = RawResult(
unique_id = unique_id,
start_logits = start_logits,
end_logits = end_logits)
all_results = [formatted_result]
write_predictions(eval_example, eval_features, all_results, n_best_size=3, max_answer_length=64, do_lower_case=True, output_prediction_file="./results/preds.json",output_nbest_file="./results/nbest.json", output_null_log_odds_file="./results/null_log_odds.json",version_2_with_negative=True)