How to remove keys related to Celey tasks ID from the event ?
Closed this issue · 3 comments
Hello,
I updated stuctlog
and django-struclog
to the latest version and I notice change in the tasks execution. I followed the upgrade recommendations but still can't figure out how to get the code working as before. I must say that I'm new with the Structlog configuration and not confortable with it.
Basically, I'm in develoment and I use structlog to read output in the console. For this reason I need to remove unnecessary keys from Celery, but also the keys from Django user_id
and ip
.
Previously I had the code snipet below in celery.py
to remove 'request_id', 'ip', 'user_id'. The keys task_id
and parent_task_id
weren't printed. But since I upgraded the keys are back.
@receiver(bind_extra_request_metadata)
def bind_unbind_metadata(request, logger, **kwargs):
logger.unbind('request_id', 'ip', 'user_id')
Could you please give hints on how to remove these keys from the console ?
I tried but it doesn't seems to work.
@receiver(bind_extra_request_metadata)
def bind_unbind_metadata(request, logger, **kwargs):
structlog.contextvars.unbind_contextvars('task_id', 'parent_task_id', 'ip', 'user_id', 'request_id',)
settings.py
LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"json_formatter": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.processors.JSONRenderer(sort_keys=False),
"foreign_pre_chain": [
structlog.contextvars.merge_contextvars,
structlog.processors.TimeStamper(fmt="iso"),
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
],
},
"plain_console": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.dev.ConsoleRenderer(pad_event=43, colors=True, force_colors=True),
},
"key_value": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.processors.KeyValueRenderer(key_order=['level',
'logger',
'event',
'timestamp'],
sort_keys=False
),
},
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "plain_console",
},
"json_file": {
"class": "logging.handlers.WatchedFileHandler",
"filename": "logs/json.log",
"formatter": "json_formatter",
},
"flat_line_file": {
"class": "logging.handlers.WatchedFileHandler",
"filename": "logs/flat_line.log",
"formatter": "key_value",
},
},
"loggers": {
"authentication": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"pnl": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"market": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"account": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"statistics": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"widget": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
}
}
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.stdlib.filter_by_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
],
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
celery.py
import logging
import structlog
from django_structlog.celery.steps import DjangoStructLogInitStep
app = Celery("myapp", broker='redis://localhost:6375/0')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
app.steps['worker'].add(DjangoStructLogInitStep)
@setup_logging.connect
def receiver_setup_logging(loglevel, logfile, format, colorize, **kwargs): # pragma: no cover
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"json_formatter": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.processors.JSONRenderer(sort_keys=False),
"foreign_pre_chain": [
structlog.contextvars.merge_contextvars,
structlog.processors.TimeStamper(fmt="iso"),
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
],
},
"plain_console": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.dev.ConsoleRenderer(pad_event=43, colors=True, force_colors=True),
},
"key_value": {
"()": structlog.stdlib.ProcessorFormatter,
"processor": structlog.processors.KeyValueRenderer(sort_keys=False,
key_order=['level',
'logger',
'timestamp',
'event']),
},
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "plain_console",
},
},
"loggers": {
"authentication": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"pnl": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"market": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"account": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"statistics": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
"widget": {
"handlers": ["console", "flat_line_file", "json_file"],
"level": "INFO",
'propagate': False,
},
}
}
)
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.stdlib.filter_by_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
],
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
@receiver(bind_extra_request_metadata)
def bind_unbind_metadata(request, logger, **kwargs):
structlog.contextvars.unbind_contextvars('task_id', 'parent_task_id', 'ip', 'user_id', 'request_id',)
# logger.try_unbind('task_id', 'parent_task_id', 'ip', 'user_id', 'request_id',)
Thank you
Sorry for the extra long delay.
I took the time to test it on my own.
I believe your problem is that you register the signal in the celery worker config but this signal has to registered in the django worker.
I could make it work using the demo project app
I created this file: django_structlog_demo_project.users.signals.py
:
import structlog
from django.dispatch import receiver
from django_structlog.signals import bind_extra_request_metadata
@receiver(bind_extra_request_metadata)
def bind_unbind_metadata(request, logger, **kwargs):
structlog.contextvars.unbind_contextvars('task_id', 'parent_task_id', 'ip', 'user_id', 'request_id',)
I modified this file: django_structlog_demo_project.users.apps.py
:
from django.apps import AppConfig
class UsersAppConfig(AppConfig):
name = "django_structlog_demo_project.users"
verbose_name = "Users"
default_auto_field = "django.db.models.AutoField"
def ready(self):
# noinspection PyUnresolvedReferences
from . import signals
without signal
django-structlog-django-1 | 2022-10-22T13:35:51.019594Z [info ] request_started [django_structlog.middlewares.request] ip=0.0.0.0 request=POST /success_task request_id=f65575cc-2fdf-47cd-b2e0-d08d36a72ae2 user_agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:106.0) Gecko/20100101 Firefox/106.0 user_id=1
django-structlog-django-1 | 2022-10-22T13:35:51.023696Z [info ] Enqueuing successful task [django_structlog_demo_project.home.views] ip=0.0.0.0 request_id=f65575cc-2fdf-47cd-b2e0-d08d36a72ae2 user_id=1
django-structlog-django-1 | 2022-10-22T13:35:51.216770Z [info ] task_enqueued [django_structlog.celery.receivers] child_task_id=551cbf0f-1829-45df-a6dc-bc656f5a07d5 child_task_name=django_structlog_demo_project.taskapp.celery.successful_task ip=0.0.0.0 request_id=f65575cc-2fdf-47cd-b2e0-d08d36a72ae2 user_id=1
django-structlog-django-1 | 2022-10-22T13:35:51.222120Z [info ] request_finished [django_structlog.middlewares.request] code=201 ip=0.0.0.0 request=POST /success_task request_id=f65575cc-2fdf-47cd-b2e0-d08d36a72ae2 user_id=1
django-structlog-celeryworker-1 | 2022-10-22T13:35:51.220095Z [info ] This is a successful task [django_structlog_demo_project.taskapp.celery] ip=0.0.0.0 request_id=f65575cc-2fdf-47cd-b2e0-d08d36a72ae2 task_id=551cbf0f-1829-45df-a6dc-bc656f5a07d5 user_id=1
django-structlog-celeryworker-1 | 2022-10-22T13:35:51.232399Z [info ] task_succeeded [django_structlog.celery.receivers] ip=0.0.0.0 request_id=f65575cc-2fdf-47cd-b2e0-d08d36a72ae2 task_id=551cbf0f-1829-45df-a6dc-bc656f5a07d5 user_id=1
with properly registered signal:
django-structlog-django-1 | 2022-10-22T13:33:42.021843Z [info ] request_started [django_structlog.middlewares.request] request=POST /success_task user_agent=Mozilla/5.0 (Macintosh; I
ntel Mac OS X 10.15; rv:106.0) Gecko/20100101 Firefox/106.0
django-structlog-django-1 | 2022-10-22T13:33:42.024516Z [info ] Enqueuing successful task [django_structlog_demo_project.home.views]
django-structlog-django-1 | 2022-10-22T13:33:42.219582Z [info ] task_enqueued [django_structlog.celery.receivers] child_task_id=813e68ec-0519-465a-ab60-ed9ddb9b7b37 child_task_name
=django_structlog_demo_project.taskapp.celery.successful_task
django-structlog-celeryworker-1 | 2022-10-22T13:33:42.226876Z [info ] This is a successful task [django_structlog_demo_project.taskapp.celery] task_id=813e68ec-0519-465a-ab60-ed9ddb9b7b37
django-structlog-django-1 | 2022-10-22T13:33:42.228530Z [info ] request_finished [django_structlog.middlewares.request] code=201 request=POST /success_task user_id=1
django-structlog-celeryworker-1 | 2022-10-22T13:33:42.236263Z [info ] task_succeeded [django_structlog.celery.receivers] task_id=813e68ec-0519-465a-ab60-ed9ddb9b7b37
Please reopen if it does not resolve your problem.
Have a great day.
I just realised there are also data you want to unregister in celery worker like the task_id
and the parent_task_id
.
I'll try to do that.
I think I found the source of your problem.
You were using bind_extra_request_metadata
instead of bind_extra_task_metadata
in your worker.
I am not sure of what exactly you want to achieve but I see two things.
Scenario A: Purge celery task log metadata only
In this scenario you keep all the data in the django worker logs remove things from the celery worker logs.
Example:
django-structlog-django-1 | 2022-10-22T14:35:11.898589Z [info ] request_started [django_structlog.middlewares.request] ip=0.0.0.0 request=POST /nesting_task request_id=5c4c4946-dc8e-4d12-83f7-39641997487e user_agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:106.0) Gecko/20100101 Firefox/106.0 user_id=1
django-structlog-django-1 | 2022-10-22T14:35:11.902727Z [info ] Enqueuing nesting task [django_structlog_demo_project.home.views] ip=0.0.0.0 request_id=5c4c4946-dc8e-4d12-83f7-39641997487e user_id=1
django-structlog-django-1 | 2022-10-22T14:35:12.119492Z [info ] task_enqueued [django_structlog.celery.receivers] child_task_id=68c01e43-5fbf-4cfe-8083-d7fd6d5cb2df child_task_name=django_structlog_demo_project.taskapp.celery.nesting_task ip=0.0.0.0 request_id=5c4c4946-dc8e-4d12-83f7-39641997487e user_id=1
django-structlog-django-1 | 2022-10-22T14:35:12.123007Z [info ] request_finished [django_structlog.middlewares.request] code=201 ip=0.0.0.0 request=POST /nesting_task request_id=5c4c4946-dc8e-4d12-83f7-39641997487e user_id=1
django-structlog-celeryworker-1 | 2022-10-22T14:35:12.136428Z [info ] This is a nesting task [django_structlog_demo_project.taskapp.celery] foo=Bar
django-structlog-celeryworker-1 | 2022-10-22T14:35:12.146243Z [info ] task_enqueued [django_structlog.celery.receivers] child_task_id=62c5870a-7dce-491e-97a6-e921776a7ccd child_task_name=django_structlog_demo_project.taskapp.celery.nested_task foo=Bar
django-structlog-celeryworker-1 | 2022-10-22T14:35:12.150432Z [info ] task_succeeded [django_structlog.celery.receivers] foo=Bar
django-structlog-celeryworker-1 | 2022-10-22T14:35:12.151278Z [info ] This is a nested task [django_structlog_demo_project.taskapp.celery] foo=Bar
django-structlog-celeryworker-1 | 2022-10-22T14:35:12.158961Z [info ] task_succeeded [django_structlog.celery.receivers] foo=Bar
In the django worker (in django_structlog_demo_project.users.signals.py
) this will remove the data you do not want to pass from your django worker to the celery task:
import structlog
from django.dispatch import receiver
from django_structlog.celery.signals import modify_context_before_task_publish
@receiver(modify_context_before_task_publish)
def receiver_modify_context_before_task_publish(sender, signal, context, **kwargs):
for k in ['ip', 'user_id', 'request_id',]:
if k in context:
del context[k]
In the celery worker (in celery.py
) this will remove the auto-added task_id
, and parent_task_id
(this part is identical in both scenarios):
from django_structlog.celery.signals import bind_extra_task_metadata
@receiver(bind_extra_task_metadata)
def bind_unbind_metadata(sender, signal, task=None, logger=None, **kwargs):
structlog.contextvars.unbind_contextvars('task_id', 'parent_task_id')
Scenario B: You want to purge log metadata in both celery worker and django worker
Example:
django-structlog-django-1 | 2022-10-22T14:45:39.101944Z [info ] request_started [django_structlog.middlewares.request] request=POST /nesting_task user_agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:106.0) Gecko/20100101 Firefox/106.0
django-structlog-django-1 | 2022-10-22T14:45:39.105199Z [info ] Enqueuing nesting task [django_structlog_demo_project.home.views]
django-structlog-django-1 | 2022-10-22T14:45:39.229036Z [info ] task_enqueued [django_structlog.celery.receivers] child_task_id=3d8660c1-69ab-4dbb-8606-7fd9d684747c child_task_name=django_structlog_demo_project.taskapp.celery.nesting_task
django-structlog-celeryworker-1 | 2022-10-22T14:45:39.231120Z [info ] This is a nesting task [django_structlog_demo_project.taskapp.celery] foo=Bar
django-structlog-django-1 | 2022-10-22T14:45:39.239418Z [info ] request_finished [django_structlog.middlewares.request] code=201 request=POST /nesting_task user_id=1
django-structlog-celeryworker-1 | 2022-10-22T14:45:39.243068Z [info ] task_enqueued [django_structlog.celery.receivers] child_task_id=e791d29b-5f79-4a53-a5ed-0cb4848839c6 child_task_name=django_structlog_demo_project.taskapp.celery.nested_task foo=Bar
django-structlog-celeryworker-1 | 2022-10-22T14:45:39.246289Z [info ] This is a nested task [django_structlog_demo_project.taskapp.celery] foo=Bar
django-structlog-celeryworker-1 | 2022-10-22T14:45:39.249410Z [info ] task_succeeded [django_structlog.celery.receivers] foo=Bar
django-structlog-celeryworker-1 | 2022-10-22T14:45:39.252788Z [info ] task_succeeded [django_structlog.celery.receivers] foo=Bar
In the django worker (in django_structlog_demo_project.users.signals.py
) this will remove the request data to every logs and it will also not be passed to the task:
import structlog
from django.dispatch import receiver
from django_structlog.signals import bind_extra_request_metadata
@receiver(bind_extra_request_metadata)
def bind_unbind_metadata(request, logger, **kwargs):
structlog.contextvars.unbind_contextvars('ip', 'user_id', 'request_id',)
In the celery worker (in celery.py
) this will remove the auto-added task_id
, and parent_task_id
(this part is identical in both scenarios):
from django_structlog.celery.signals import bind_extra_task_metadata
@receiver(bind_extra_task_metadata)
def bind_unbind_metadata(sender, signal, task=None, logger=None, **kwargs):
structlog.contextvars.unbind_contextvars('task_id', 'parent_task_id')
Please reopen if it does not resolve your problem.
Have a great day.