BigQueryExampleGen failing in InteractiveContext on 1.14.0
colinjc opened this issue · 7 comments
System information
- Have I specified the code to reproduce the issue (Yes, No): yes
- Environment in which the code is executed (e.g., Local(Linux/MacOS/Windows),
Interactive Notebook, Google Cloud, etc): Linux, Notebook - TensorFlow version:
- TFX Version: 1.14.0
- Python version: 2.9
- Python dependencies (from
pip freeze
output):
Describe the current behavior
When using BigQueryExampleGen in a notebook using InteractiveContext it returns a JSON decoding error. This may also be true for pipelines executed on KFP, but I haven't tried it.
Describe the expected behavior
The query executes successfully
Standalone code to reproduce the issue
import tfx
example_gen = tfx.v1.extensions.google_cloud_big_query.BigQueryExampleGen(
query="SELECT * FROM ...",
)
context.run(example_gen, enable_cache=False)
Name of your Organization (Optional)
Other info / logs
Adding custom_config="{}"
works around this issue
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_1951224/1954825689.py in <cell line: 6>()
4 query="SELECT * FROM sdp-stg-cti-data.intermediate.nits_training_set",
5 )
----> 6 context.run(example_gen, enable_cache=False)
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/tfx/orchestration/experimental/interactive/notebook_utils.py in run_if_ipython(*args, **kwargs)
29 # __IPYTHON__ variable is set by IPython, see
30 # https://ipython.org/ipython-doc/rel-0.10.2/html/interactive/reference.html#embedding-ipython.
---> 31 return fn(*args, **kwargs)
32 else:
33 logging.warning(
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/tfx/orchestration/experimental/interactive/interactive_context.py in run(self, component, enable_cache, beam_pipeline_args)
162 telemetry_utils.LABEL_TFX_RUNNER: runner_label,
163 }):
--> 164 execution_id = launcher.launch().execution_id
165
166 return execution_result.ExecutionResult(
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/tfx/orchestration/launcher/base_component_launcher.py in launch(self)
204 # be immutable in this context.
205 # output_dict can still be changed, specifically properties.
--> 206 self._run_executor(execution_decision.execution_id,
207 copy.deepcopy(execution_decision.input_dict),
208 execution_decision.output_dict,
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/tfx/orchestration/launcher/in_process_component_launcher.py in _run_executor(self, execution_id, input_dict, output_dict, exec_properties)
71 # be immutable in this context.
72 # output_dict can still be changed, specifically properties.
---> 73 executor.Do(
74 copy.deepcopy(input_dict), output_dict, copy.deepcopy(exec_properties))
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/tfx/components/example_gen/base_example_gen_executor.py in Do(self, input_dict, output_dict, exec_properties)
277 logging.info('Generating examples.')
278 with self._make_beam_pipeline() as pipeline:
--> 279 example_splits = self.GenerateExamplesByBeam(pipeline, exec_properties)
280
281 # pylint: disable=expression-not-assigned, no-value-for-parameter
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/tfx/components/example_gen/base_example_gen_executor.py in GenerateExamplesByBeam(self, pipeline, exec_properties)
199 buckets.append(total_buckets)
200 example_splits = (
--> 201 pipeline
202 | 'InputToRecord' >>
203 # pylint: disable=no-value-for-parameter
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, pvalueish, _unused)
1090
1091 def __ror__(self, pvalueish, _unused=None):
-> 1092 return self.transform.__ror__(pvalueish, self.label)
1093
1094 def expand(self, pvalue):
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/apache_beam/transforms/ptransform.py in __ror__(self, left, label)
612 pvalueish = _SetInputPValues().visit(pvalueish, replacements)
613 self.pipeline = p
--> 614 result = p.apply(self, pvalueish, label)
615 if deferred:
616 return result
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
664 old_label, transform.label = transform.label, label
665 try:
--> 666 return self.apply(transform, pvalueish)
667 finally:
668 transform.label = old_label
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/apache_beam/pipeline.py in apply(self, transform, pvalueish, label)
710 transform.type_check_inputs(pvalueish)
711
--> 712 pvalueish_result = self.runner.apply(transform, pvalueish, self._options)
713
714 if type_options is not None and type_options.pipeline_type_check:
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/apache_beam/runners/runner.py in apply(self, transform, input, options)
199 ):
200 # TODO(robertwb): Remove indirection once internal references are fixed.
--> 201 return self.apply_PTransform(transform, input, options)
202
203 def apply_PTransform(self, transform, input, options):
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/apache_beam/runners/runner.py in apply_PTransform(self, transform, input, options)
203 def apply_PTransform(self, transform, input, options):
204 # TODO(robertwb): Remove indirection once internal references are fixed.
--> 205 return transform.expand(input)
206
207 def is_fnapi_compatible(self):
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/apache_beam/transforms/ptransform.py in expand(self, pcoll)
994 # Might not be a function.
995 pass
--> 996 return self._fn(pcoll, *args, **kwargs)
997
998 def default_label(self):
~/.pyenv/virtualenvs/cti-ml-xgboost/3.9.15/lib/python3.9/site-packages/tfx/extensions/google_cloud_big_query/example_gen/executor.py in _BigQueryToExample(pipeline, exec_properties, split_pattern)
67 converter = _BigQueryConverter(split_pattern, project)
68 big_query_custom_config = (
---> 69 json.loads(exec_properties['custom_config'])
70 if 'custom_config' in exec_properties
71 else None
~/.pyenv/versions/3.9.15/lib/python3.9/json/__init__.py in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
337 else:
338 if not isinstance(s, (bytes, bytearray)):
--> 339 raise TypeError(f'the JSON object must be str, bytes or bytearray, '
340 f'not {s.__class__.__name__}')
341 s = s.decode(detect_encoding(s), 'surrogatepass')
TypeError: the JSON object must be str, bytes or bytearray, not NoneType
The BigQueryExampleGen
executor code is written to accept custom_config
in below format:
custom_config = json.dumps({'query_key': 'query_value'})
But, when we don't pass custom_config
to BigQueryExampleGen
, it fails with error: TypeError: the JSON object must be str, bytes or bytearray, not NoneType.
@colinjc, Thank you for bringing this to our attention.
@roseayeon , Loading BigQuery custom config fails when we don't pass custom_config
param to BigQueryExampleGen
or set custom_config=None
or custom_config={}
. Is this expected? Thanks
Oh, this is not an intended behavior. Let me fix it shortly.
I am facing the same error while using BigQueryExampleGen. Any update on the fix?
Facing the same error. Any update on the fix ?
Hey, still not fixed yet ? @lego0901
Adding custom_config=json.dumps({})
works as a temporary fix
This is fixed. Thanks for your all comments!