How to turn off multiprocessing to run on AWS Lambda

Question

How to turn off multiprocessing to run on AWS Lambda

Closed this issue 5 months ago · 2 comments

Based on the discussion from this issue
#34

I am trying to call multimetric as a subprocess on an AWS Lambda function.

However Lambda does not support multiprocessing and looking at the code I dont see any obvious way to make sure it doesn't call multiprocessing.Pool.

Is there a way to achieve this?

Answer 1 · 2024-05-16T11:17:19.000Z

Basically you would need to roll your own wrapper script like

import logging
import json

from multimetric.__main__ import parse_args, file_process

def main_func():
    _args = parse_args()

    _result = {"files": {}, "overall": {}}

    # Get importer
    _importer = {}
    _importer["import_compiler"] = importer_pick(_args, _args.warn_compiler)
    _importer["import_coverage"] = importer_pick(_args, _args.coverage)
    _importer["import_duplication"] = importer_pick(
        _args, _args.warn_duplication)
    _importer["import_functional"] = importer_pick(
        _args, _args.warn_functional)
    _importer["import_security"] = importer_pick(_args, _args.warn_standard)
    _importer["import_standard"] = importer_pick(_args, _args.warn_security)
    # sanity check
    _importer = {k: v for k, v in _importer.items() if v}

    # instance metric modules
    _overallMetrics = get_modules_metrics(_args, **_importer)
    _overallCalc = get_modules_calculated(_args, **_importer)

    results = []

    for file in _args.files:
        results += file_process(file, _args, _importer)

    for x in results:
        _result["files"][x[1]] = x[0]

    for y in _overallMetrics:
        _result["overall"].update(
            y.get_results_global([x[4] for x in results]))
    for y in _overallCalc:
        _result["overall"].update(y.get_results(_result["overall"]))
    for m in get_modules_stats(_args, **_importer):
        _result = m.get_results(_result, "files", "overall")

    def round_float(item):
        if isinstance(item, dict):
            for k, v in item.items():
                item[k] = round_float(v)
        elif isinstance(item, list):
            for index, value in enumerate(item):
                item[index] = round_float(value)
        elif isinstance(item, float):
            item = round(item, 3)
        return item

    _result = round_float(_result)

    if not _args.dump:
        # Output
        logging.getLogger('stdout').info(json.dumps(_result, indent=2, sort_keys=True))
    
if __name__ == '__main__':
    main_func()

That would run everything sequentially.

Please close the issue in case that solves your problem

Answer 2 · 2024-05-17T10:02:57.000Z

That's amazing, thanks a lot for such a detailed and timely response!

The code above was almost 100% there. I did very minor changes for it to run. I'll paste it here for later reference in case anyone else finds it useful later

from multimetric.__main__ import parse_args, file_process, importer_pick, get_modules_metrics, get_modules_calculated, get_modules_stats
import glob


def main_func():

    _args = parse_args([file_path])

    _result = {"files": {}, "overall": {}}

    # Get importer
    _importer = {}
    _importer["import_compiler"] = importer_pick(_args, _args.warn_compiler)
    _importer["import_coverage"] = importer_pick(_args, _args.coverage)
    _importer["import_duplication"] = importer_pick(
        _args, _args.warn_duplication)
    _importer["import_functional"] = importer_pick(
        _args, _args.warn_functional)
    _importer["import_security"] = importer_pick(_args, _args.warn_standard)
    _importer["import_standard"] = importer_pick(_args, _args.warn_security)
    # sanity check
    _importer = {k: v for k, v in _importer.items() if v}

    # instance metric modules
    _overallMetrics = get_modules_metrics(_args, **_importer)
    _overallCalc = get_modules_calculated(_args, **_importer)

    results = []

    for file in _args.files:
        results.append(file_process(file, _args, _importer))

    for x in results:
        _result["files"][x[1]] = x[0]

    for y in _overallMetrics:
        _result["overall"].update(y.get_results_global([x[4] for x in results]))
    for y in _overallCalc:
        _result["overall"].update(y.get_results(_result["overall"]))
    for m in get_modules_stats(_args, **_importer):
        _result = m.get_results(_result, "files", "overall")

    def round_float(item):
        if isinstance(item, dict):
            for k, v in item.items():
                item[k] = round_float(v)
        elif isinstance(item, list):
            for index, value in enumerate(item):
                item[index] = round_float(value)
        elif isinstance(item, float):
            item = round(item, 3)
        return item

    _result = round_float(_result)

    if not _args.dump:
        # Output
        print(json.dumps(_result, indent=2, sort_keys=True))

if __name__ == '__main__':
    main_func()

Really appreciate the work you have done and the fact you have opened it to the whole community. A huge thanks!