Fixed bug in acryl-datahub still happening in datahub-actions
sebaap opened this issue · 7 comments
I'm facing this issue that was fixed last week datahub-project/datahub#7855, but datahub-actions seems to still be using acryl-datahub 0.10.0
.
Collecting acryl-datahub[bigquery,datahub-kafka,datahub-rest]==0.10.0 (from -r /tmp/datahub/ingest/requirements-5e6bb314-cc04-4dcb-a4e8-05f29d37a7ce.txt (line 1))
I couldn't find where this is being pinned. Happy to submit a PR if someone can point me to the right direction.
Also, maybe good to relax the constrain to ~=
so patch versions get updated automatically?
I'm using acryldata/datahub-actions:v0.0.12
[2023-04-24 12:00:06,517] ERROR {datahub.entrypoints:196} - File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 114, in _add_init_error_context
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/entrypoints.py", line 176, in main
sys.exit(datahub(standalone_mode=False, **kwargs))
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/click/decorators.py", line 26, in new_func
return f(get_current_context(), *args, **kwargs)
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/telemetry/telemetry.py", line 379, in wrapper
raise e
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/telemetry/telemetry.py", line 334, in wrapper
res = func(*args, **kwargs)
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/utilities/memory_leak_detector.py", line 95, in wrapper
return func(ctx, *args, **kwargs)
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/cli/ingest_cli.py", line 179, in run
pipeline = Pipeline.create(
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 303, in create
return cls(
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 186, in __init__
with _add_init_error_context(
File "/usr/local/lib/python3.10/contextlib.py", line 153, in __exit__
self.gen.throw(typ, value, traceback)
File "/tmp/datahub/ingest/venv-bigquery-0.10.0/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 116, in _add_init_error_context
raise PipelineInitError(f"Failed to {step}: {e}") from e
PipelineInitError: Failed to find a registered source for type bigquery: 'str' object is not callable
If you deployed datahub-actions using the helm chart you need to change the value of datahubDefaultCli
to 0.10.2(.1)
Yep you’ll need cli version 0.10.2.1 for the fix.
I have the similar error when I use redshift as source
Traceback (most recent call last):
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/datahub/entrypoints.py", line 182, in main
sys.exit(datahub(standalone_mode=False, **kwargs))
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/click/decorators.py", line 26, in new_func
return f(get_current_context(), *args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/datahub/telemetry/telemetry.py", line 379, in wrapper
raise e
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/datahub/telemetry/telemetry.py", line 334, in wrapper
res = func(*args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/datahub/utilities/memory_leak_detector.py", line 95, in wrapper
return func(ctx, *args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/datahub/cli/ingest_cli.py", line 187, in run
pipeline = Pipeline.create(
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 328, in create
return cls(
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 211, in __init__
with _add_init_error_context(
File "/usr/local/lib/python3.10/contextlib.py", line 153, in __exit__
self.gen.throw(typ, value, traceback)
File "/tmp/datahub/ingest/venv-redshift-0.10.2/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 121, in _add_init_error_context
raise PipelineInitError(f"Failed to {step}: {e}") from e
datahub.ingestion.run.pipeline.PipelineInitError: Failed to find a registered source for type redshift: 'str' object is not callable
I use this command "datahub docker quickstart" to deploy datahub
DataHub version: v0.10.2
DataHub CLI version: 0.10.2.1
Python version: 3.8.15
@aNull404 looks like your datahubDefaultCli
is still set to 0.10.2 instead of 0.10.2.1. You can run with the newer version as per the docs here https://datahubproject.io/docs/ui-ingestion/#advanced-running-with-a-specific-cli-version
@aNull404 looks like your
datahubDefaultCli
is still set to 0.10.2 instead of 0.10.2.1. You can run with the newer version as per the docs here https://datahubproject.io/docs/ui-ingestion/#advanced-running-with-a-specific-cli-version
another error occured when I set cli to 0.10.2.1
Traceback (most recent call last):
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/entrypoints.py", line 182, in main
sys.exit(datahub(standalone_mode=False, **kwargs))
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/click/decorators.py", line 26, in new_func
return f(get_current_context(), *args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/telemetry/telemetry.py", line 379, in wrapper
raise e
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/telemetry/telemetry.py", line 334, in wrapper
res = func(*args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/utilities/memory_leak_detector.py", line 95, in wrapper
return func(ctx, *args, **kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/cli/ingest_cli.py", line 198, in run
loop.run_until_complete(run_func_check_upgrade(pipeline))
File "/usr/local/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
return future.result()
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/cli/ingest_cli.py", line 158, in run_func_check_upgrade
ret = await the_one_future
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/cli/ingest_cli.py", line 149, in run_pipeline_async
return await loop.run_in_executor(
File "/usr/local/lib/python3.10/concurrent/futures/thread.py", line 58, in run
result = self.fn(*self.args, **self.kwargs)
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/cli/ingest_cli.py", line 140, in run_pipeline_to_completion
raise e
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/cli/ingest_cli.py", line 132, in run_pipeline_to_completion
pipeline.run()
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 359, in run
for wu in itertools.islice(
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/utilities/source_helpers.py", line 104, in auto_stale_entity_removal
yield from stale_entity_removal_handler.gen_removed_entity_workunits()
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/ingestion/source/state/stale_entity_removal_handler.py", line 267, in gen_removed_entity_workunits
last_checkpoint: Optional[Checkpoint] = self.source.get_last_checkpoint(
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/ingestion/source/state/stateful_ingestion_base.py", line 320, in get_last_checkpoint
self.last_checkpoints[job_id] = self._get_last_checkpoint(
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/ingestion/source/state/stateful_ingestion_base.py", line 295, in _get_last_checkpoint
self.ingestion_checkpointing_state_provider.get_latest_checkpoint(
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py", line 76, in get_latest_checkpoint
] = self.graph.get_latest_timeseries_value(
File "/tmp/datahub/ingest/venv-redshift-0.10.2.1/lib/python3.10/site-packages/datahub/ingestion/graph/client.py", line 299, in get_latest_timeseries_value
assert len(values) == 1
AssertionError
great, thanks everyone. Confirming that changing the default CLI value to 0.10.2.1 fixed the issue. For anyone having the same problem the value in the helm chart is global.datahub.managed_ingestion.defaultCliVersion