Marshall/Unmarshall with BSON (JS-BSON)
vsdigitall opened this issue · 2 comments
As written in introduction:
During store.set, values marshalled into JSON and stored in process heap, so all threads can access it, and unmarshalled while users retrieve them via store.get.
So every set/get operation calls JSON.stringify/JSON.parse which is quite slow. Why not use BSON internally to marshall/unmarshall? Example is here:
https://github.com/kyriosli/node-shared-cache/blob/master/src/bson.cc
BSON require less memory and is faster while using inside c++ module. I believe you can expect over 10x performance improvement comparing to JSON.stringify/JSON.parse.
Also node-shared-cache has locking mechanism which allows safely read/write object values. It could be nice cross-thead resource sharing feature if we can just send object references to threads with ability to read/write and immediately see changes in other threads.
I would like to give you some feedback on cross-process object sharing approach in 'node-shared-cache' module. I hope you will find it useful.
I run a test with 20 workers-readers and 1 worker-writer with 'node-shared-cache' on 20-core Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz which are accessing the same shared object. One process is setting data of shared object, others read the data of the same object with total rate 2000 rq/sec.
After 24 hrs of execution I received no memory leaks and no exceptions:
Napa.js sounds promising, and if we can get there a fast cross-thread (and cross-process) object sharing with transparent read/write ops then people can write more complex programs faster and with better reliability and performance.
Test source code:
const cp = require('child_process'),
maxSocketWorkers = 20,
CACHE_NAME = "NODE_CACHE",
binding = require('node-shared-cache');
global.workers = new Object();
function sleep (ms) {
return new Promise(resolve => {
setTimeout(resolve,ms)
})
}
function randomBytes (l) {
let r = '';
l = l / 2
for (let i = 0; i < l; i++) {
let j = Math.floor(Math.random() * 255).toString(16);
r += j.length < 2 ? '0' + j : j;
}
return r;
}
if (process.argv[2] !== 'worker') { // if we are inside host process - spawn 20 child worker processes and 1 master process
function restart (env) {
let worker = cp.spawn(process.execPath, [__filename, 'worker'], {stdio: 'inherit', env});
worker.env = env;
global.workers[env.id] = worker;
worker.on('exit', function (code, signal) {
delete global.workers[env.id];
console.log('worker ' + worker.env.id + ' died with code: ' + code + ', signal: ' + signal + '\n left workers: ' + Object.keys(global.workers).length);
});
}
restart({ // a process where we set data to shared object
id: 0,
type: 'server'
});
setTimeout(function () { // a cross-process data-getters
for (let i = 0; i < maxSocketWorkers; i++)
restart({
id: i + 1,
type: 'client'
});
}, 10000);
console.log("Started: " + Date.now());
}
else {
let obj;
if (process.env.type == 'server') {
try { // release previous instance
binding.release(CACHE_NAME);
} catch (e) {}
try { // create new shmem instance
obj = new binding.Cache(CACHE_NAME, 2 * 1024 * 1024 * 1024, binding.SIZE_2K);
} catch (e) {}
binding.clear(obj); // clear instance data
for (let i = 0; i < maxSocketWorkers; i++) { // set 20 random object properties
obj[randomBytes(64)] = randomBytes(102400);
}
let d = Object.keys(binding.dump(obj)), // dump object keys to 'd'
l = d.length;
console.log("keys prepared: ", d);
async function run () {
for (;;) {
await sleep(10);
let p = obj[d[Math.floor(Math.random() * l)]]; // Get random property
for (let i = 0; i < 10; i++) // set random property value 10 times
obj[d[Math.floor(Math.random() * l)]] = { // a javascript complex object
string: randomBytes(102400),
number: Date.now(),
array: [Math.random(),Math.random(),Math.random()],
object: { o: Math.random() }
}
p = undefined; // allow faster gc
}
}
run();
}
else if (process.env.type == 'client') {
console.log("Worker # " + process.env.id + " started");
try {
obj = new binding.Cache(CACHE_NAME, 2 * 1024 * 1024 * 1024, binding.SIZE_2K);
} catch (e) {}
let d = Object.keys(binding.dump(obj)),
l = d.length;
async function run () {
for (;;) {
await sleep(10); // get 100 random property values per second per worker (total 2000 random property values per sec)
let p = obj[d[Math.floor(Math.random() * l)]]; // get random property value
p = undefined; // allow faster gc
}
}
run();
}
}
Hi @vsdigitall thanks for the feedback. Currently Napa.js is using JSON.stringify/JSON.parse in object transportation because of the following reasons:
- It's simple and straightforward.
- It has a well-defined behavior ( because it's a part of the standard )
- Readability
- There are some custom behavior for transportable objects marshall/unmarshall. Those codes are written in typescript and can be easily called by JSON.stringify/JSON.parse.
However, those are reasons why Napa.js uses JSON, but not why JSON is good. I believe it's a benefit to use a native implemented binary marshalling (like BSON) eventually. This may be discussed and planned some time in future.