zemirco/json2csv

"Transforms" options cause out-of-memory-error

Closed this issue · 1 comments

I am facing a weird issue when parsing json to csv in aws lambda.

Works perfectly with smaller (up to 20mb) files, but fails on +50mb, even though 800mb+ ram is dedicated

The code consists of few things: receiving an http stream -> piping it to json parser -> resolving the promise. Looks very simply, but crashes with out of memory error. Here are the details:

Important detail: parsing succeeds once I remove { transforms: flatten()... } options. It seems that they cause the error

  • Package version: 7.0.1
  • Node version: 18
  • Whole code:
import { APIGatewayProxyResult, APIGatewayProxyEvent } from 'aws-lambda';
import http from 'http';
import https from 'https';
import { pipeline } from 'stream';

import { isHttps } from './utils';
import { Transform as CsvTransform } from '@json2csv/node';
import flatten from './parsers/flatten';

// Function utilizes streams to read data from a url, transform it, and upload it to s3

export const lambdaHandler = async (event: APIGatewayProxyEvent): Promise<APIGatewayProxyResult> => {
    try {
        const data = typeof event === 'string' ? JSON.parse(event) : event;
        const url = data.message.extsrc;
        // const body = await streamResponse(url);
        const protocol = isHttps(url) ? https : http;
        const body = await new Promise((resolve, reject) => {
            protocol.get(url, (networkStream: http.IncomingMessage) => {
                pipeline(
                    networkStream,
                    new CsvTransform(
                        {
                            transforms: [flatten({ objects: true, arrays: true, separator: '__' })],
                        },
                        { objectMode: true },
                    ),
                    (error: any) => {
                        if (error) {
                            reject(error);
                        }
                        resolve('success');
                    },
                );
            });
        });
        const response = {
            statusCode: 200,
            body: JSON.stringify(body),
        };
        return response;
    } catch (err) {
        console.log(err);
        return {
            statusCode: 500,
            body: JSON.stringify({
                message: err,
            }),
        };
    }
};
  1. JSON data: https://data.wa.gov/api/views/f6w7-q2d2/rows.json?accessType=DOWNLOAD OR https://raw.githubusercontent.com/seductiveapps/largeJSON/master/100mb.json (both fail).
  2. Output/error:
<--- Last few GCs --->

[14:0x55dad147c790]    13746 ms: Mark-sweep (reduce) 713.6 (727.9) -> 713.6 (727.9) MB, 485.7 / 0.0 ms  (average mu = 0.153, current mu = 0.001) last resort; GC in old space requested
[14:0x55dad147c790]    14237 ms: Mark-sweep (reduce) 713.6 (727.9) -> 713.6 (727.9) MB, 490.8 / 0.0 ms  (average mu = 0.080, current mu = 0.000) last resort; GC in old space requested


<--- JS stacktrace --->

FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory
 1: 0x55dacc879103 node::Abort() [/var/lang/bin/node]
 2: 0x55dacc74df95  [/var/lang/bin/node]
 3: 0x55daccab34bd v8::Utils::ReportOOMFailure(v8::internal::Isolate*, char const*, bool) [/var/lang/bin/node]
 4: 0x55daccab37c2 v8::internal::V8::FatalProcessOutOfMemory(v8::internal::Isolate*, char const*, bool) [/var/lang/bin/node]
 5: 0x55dacccd6542 v8::internal::HeapAllocator::AllocateRawWithRetryOrFailSlowPath(int, v8::internal::AllocationType, v8::internal::AllocationOrigin, v8::internal::AllocationAlignment) [/var/lang/bin/node]   
 6: 0x55dacccb024e v8::internal::Factory::AllocateRaw(int, v8::internal::AllocationType, v8::internal::AllocationAlignment) [/var/lang/bin/node]
 7: 0x55daccca6b7b v8::internal::FactoryBase<v8::internal::Factory>::AllocateRawArray(int, v8::internal::AllocationType) [/var/lang/bin/node]
 8: 0x55daccca6d43 v8::internal::FactoryBase<v8::internal::Factory>::NewFixedArrayWithFiller(v8::internal::Handle<v8::internal::Map>, int, v8::internal::Handle<v8::internal::Oddball>, v8::internal::AllocationType) [/var/lang/bin/node]
 9: 0x55daccfd5760 v8::internal::Handle<v8::internal::NameDictionary> v8::internal::HashTable<v8::internal::NameDictionary, v8::internal::NameDictionaryShape>::NewInternal<v8::internal::Isolate>(v8::internal::Isolate*, int, v8::internal::AllocationType) [/var/lang/bin/node]
10: 0x55daccfd5c24 v8::internal::Handle<v8::internal::NameDictionary> v8::internal::HashTable<v8::internal::NameDictionary, v8::internal::NameDictionaryShape>::EnsureCapacity<v8::internal::Isolate>(v8::internal::Isolate*, v8::internal::Handle<v8::internal::NameDictionary>, int, v8::internal::AllocationType) [/var/lang/bin/node]
11: 0x55daccfd5c92 v8::internal::Handle<v8::internal::NameDictionary> v8::internal::Dictionary<v8::internal::NameDictionary, v8::internal::NameDictionaryShape>::Add<v8::internal::Isolate>(v8::internal::Isolate*, v8::internal::Handle<v8::internal::NameDictionary>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyDetails, v8::internal::InternalIndex*) [/var/lang/bin/node]
12: 0x55daccfd61e9 v8::internal::BaseNameDictionary<v8::internal::NameDictionary, v8::internal::NameDictionaryShape>::Add(v8::internal::Isolate*, v8::internal::Handle<v8::internal::NameDictionary>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyDetails, v8::internal::InternalIndex*) [/var/lang/bin/node]
13: 0x55daccf8e33c v8::internal::LookupIterator::ApplyTransitionToDataProperty(v8::internal::Handle<v8::internal::JSReceiver>) [/var/lang/bin/node]
14: 0x55daccfa9c8c v8::internal::Object::TransitionAndWriteDataProperty(v8::internal::LookupIterator*, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyAttributes, v8::Maybe<v8::internal::ShouldThrow>, v8::internal::StoreOrigin) [/var/lang/bin/node]
15: 0x55dacd12c365 v8::internal::Runtime::SetObjectProperty(v8::internal::Isolate*, v8::internal::Handle<v8::internal::Object>, v8::internal::Handle<v8::internal::Object>, v8::internal::Handle<v8::internal::Object>, v8::internal::StoreOrigin, v8::Maybe<v8::internal::ShouldThrow>) [/var/lang/bin/node]
16: 0x55dacd12dc7a v8::internal::Runtime_SetKeyedProperty(int, unsigned long*, v8::internal::Isolate*) [/var/lang/bin/node]
17: 0x55dacd5ff639  [/var/lang/bin/node]
END RequestId: 4a31198e-c28f-4f01-aaf0-2be6e059d8af
REPORT RequestId: 4a31198e-c28f-4f01-aaf0-2be6e059d8af  Init Duration: 0.13 ms  Duration: 14365.91 ms   Billed Duration: 14366 ms       Memory Size: 812 MB     Max Memory Used: 812 MB