DataDog/datadog-serverless-functions

Bug in parsing.py:get_service_from_tags_and_remove_duplicates()

Opened this issue · 0 comments

Describe what happened:

This is how the function works now:

aws/logs_monitoring/parsing.py

DD_CUSTOM_TAGS = "ddtags"
DD_SOURCE = "ddsource"

def get_service_from_tags_and_remove_duplicates(metadata):
    service = ""
    tagsplit = metadata[DD_CUSTOM_TAGS].split(",")
    for i, tag in enumerate(tagsplit):
        if tag.startswith("service:"):
            if service:
                print(f"DEL: #{i} == {tagsplit[i]}")
                # remove duplicate entry from the tags
                del tagsplit[i]
            else:
                service = tag[8:]
                print(f"SERVICE: {service}")

    metadata[DD_CUSTOM_TAGS] = ",".join(tagsplit)

    print(f"{metadata}")
    print(f"{tagsplit}")

    # Default service to source value
    return service if service else metadata[DD_SOURCE]

print ("{}".format(
    get_service_from_tags_and_remove_duplicates({
    DD_CUSTOM_TAGS:"a:b,service:bubernetes,service:shumbernetes,service:cucumbernetes,service:dumbernetes,c:d,e,f,,,",
    DD_SOURCE:"test_source"
})))

The output:

SERVICE: bubernetes
DEL: #2 == service:shumbernetes
DEL: #3 == service:dumbernetes
{'ddtags': 'a:b,service:bubernetes,service:cucumbernetes,c:d,e,f,,,', 'ddsource': 'test_source'}
['a:b', 'service:bubernetes', 'service:cucumbernetes', 'c:d', 'e', 'f', '', '', '']
bubernetes

As you may see, the resulting data still has duplicates due to the incorrect usage of the del operator within an active loop.

The suggestion is to use another approach, which will also filter out empty elements (and you can add other exclusions later):

DD_CUSTOM_TAGS = "ddtags"
DD_SOURCE = "ddsource"

def get_service_from_tags_and_remove_duplicates(metadata):
    service = ""
    tagsplit = metadata[DD_CUSTOM_TAGS].split(",")
    services = [tag for tag in tagsplit if tag.startswith("service:")]
    if services:
        # assumes all services are duplicates
        # service name is after `service:"
        service = services[0].split(":")[1]
        print(f"SERVICES: {services}")
        print(f"THE SERVICE: {service}")
        tagsplit = [services[0]] + [item for item in tagsplit if item not in services and item != ""]

    # the old part:
    # for i, tag in enumerate(tagsplit):
    #     if tag.startswith("service:"):
    #         if service:
    #             print(f"DEL: #{i} == {tagsplit[i]}")
    #             # remove duplicate entry from the tags
    #             del tagsplit[i]
    #         else:
    #             service = tag[8:]
    #             print(f"SERVICE: {service}")

    metadata[DD_CUSTOM_TAGS] = ",".join(tagsplit)

    print(f"{metadata}")
    print(f"{tagsplit}")

    # Default service to source value
    return service if service else metadata[DD_SOURCE]

print ("{}".format(
    get_service_from_tags_and_remove_duplicates({
    DD_CUSTOM_TAGS:"a:b,service:bubernetes,service:shumbernetes,service:cucumbernetes,service:dumbernetes,c:d,e,f,,,",
    DD_SOURCE:"test_source"
})))
SERVICES: ['service:bubernetes', 'service:shumbernetes', 'service:cucumbernetes', 'service:dumbernetes']
THE SERVICE: bubernetes
{'ddtags': 'service:bubernetes,a:b,c:d,e,f', 'ddsource': 'test_source'}
['service:bubernetes', 'a:b', 'c:d', 'e', 'f']
bubernetes

Describe what you expected:

A working duplicate filtration.

Steps to reproduce the issue:
See above.