mcollina/msgpack5

Can't detect if a buffer was completely consumed

jfinkhaeuser opened this issue · 7 comments

It's a question of sanity checking, really.

The decode function will return the best possible interpretation of the input data. If that was malformed in just the right way, the function still returns data.

By comparing the input length with the remaining input length after decoding (or the consumed length), we can detect whether decode consumed the entire buffer and that's a fairly good indication that the buffer did not contain garbage.

You can implement this behavior by passing in a bl instance to decode, and then check if the message is consumed, as length will be updated accordingly.

Would you mind to send a PR to the Readme documenting that behavior? Thank so much!

Here's some test code instead :)

var msgpack = require('msgpack5')();
var assert = require('assert');
var bl = require('bl');

var serialize = function(data) {
  var ret = msgpack.encode(data).toString('binary');
  return ret;
};

var deserialize1 = function(data) {
  var msgpack = require('msgpack5')();
  var buffer = require('buffer');
  try {
    var buf = new buffer.Buffer(data, 'binary');
    var ret = msgpack.decode(buf);
    return ret;
  } catch (err) {
    return undefined;
  }
};


var deserialize2 = function(data) {
  var msgpack = require('msgpack5')();
  var buffer = require('buffer');
  try {
    var buf = new buffer.Buffer(data, 'binary');
    var list = new bl(buf);
    console.log("length before: ", list.length);
    var ret = msgpack.decode(list);
    console.log("length after: ", list.length);
    return ret;
  } catch (err) {
    console.log('got err', err);
    return undefined;
  }
};

var test = {
 foo: {
  bar: 1,
  quux: 'test',
 },
 lala: [ 1 ],
 blah: 'blah',
};

// Good case
var serialized = serialize(test);
var deserialized = deserialize1(serialized);
assert.deepEqual(test, deserialized);

// Bad case
var bad = deserialize1(JSON.stringify(test));
// It's true that bad != test, but the following passes
// (at least on my machine).
assert.strictEqual(123, bad);

// buffer lists
bad = deserialize2(serialized);
assert.strictEqual(0, bad);

Not only do I fail to see how buffer lists actually improve anything, their length isn't adjusted as documented.

On my machine the lengths before and after the 'garbled' input are different:

var msgpack = require('./')();
var assert = require('assert');
var bl = require('bl');

var deserialize2 = function(data) {
  try {
    var buf = new Buffer(data, 'binary');
    var list = new bl(buf);
    console.log("length before: ", list.length);
    var ret = msgpack.decode(list);
    console.log("length after: ", list.length);
    return ret;
  } catch (err) {
    console.log('got err', err);
    return undefined;
  }
};

// buffer lists
var bad = deserialize2('this is garbage');
assert.deepEqual(test, bad);

Not for me, I'm afraid. Which node version are you using? Incidentally, the node documentation clearly states that "length" is the available buffer memory, not necessarily the content sizes. That makes it a little hard to rely on.
It would be helpful for msgpack to report how many bytes it (de) serialized; IMHO that's part of any sane serialization API.
On 25 Apr 2015 11:10 am, Matteo Collina notifications@github.com wrote:On my machine the lengths before and after the 'garbled' input are different:

var msgpack = require('./')();
var assert = require('assert');
var bl = require('bl');

var deserialize2 = function(data) {
try {
var buf = new Buffer(data, 'binary');
var list = new bl(buf);
console.log("length before: ", list.length);
var ret = msgpack.decode(list);
console.log("length after: ", list.length);
return ret;
} catch (err) {
console.log('got err', err);
return undefined;
}
};

// buffer lists
var bad = deserialize2('this is garbage');
assert.deepEqual(test, bad);

—Reply to this email directly or view it on GitHub.

!DSPAM:1,553b5a7322461250617536!

var msgpack = require('./')();
var assert = require('assert');
var bl = require('bl');

var deserialize2 = function(data) {
  var msgpack = require('./')();
  var buffer = require('buffer');
  try {
    var buf = new Buffer(data, 'binary');
    var list = new bl(buf);
    console.log("length before: ", list.length);
    var ret = msgpack.decode(list);
    console.log("length after: ", list.length);
    return ret;
  } catch (err) {
    console.log('got err', err);
    return undefined;
  }
};

// buffer lists
var bad = deserialize2('this is garbage');
console.log(bad)

With the output:

$ node error.js
length before:  15
length after:  14
116
  1. the example above runs fine in v0.10.36, v0.12.0 and iojs v1.8.1. I am on Mac OS X.
  2. using the buffer list approach (as shown above) reports the number of bytes consumed.

Fair enough, a different library it is.
On 25 Apr 2015 11:46 am, Matteo Collina notifications@github.com wrote:var msgpack = require('./')();
var assert = require('assert');
var bl = require('bl');

var deserialize2 = function(data) {
var msgpack = require('./')();
var buffer = require('buffer');
try {
var buf = new Buffer(data, 'binary');
var list = new bl(buf);
console.log("length before: ", list.length);
var ret = msgpack.decode(list);
console.log("length after: ", list.length);
return ret;
} catch (err) {
console.log('got err', err);
return undefined;
}
};

// buffer lists
var bad = deserialize2('this is garbage');
console.log(bad)

With the output:

$ node error.js
length before: 15
length after: 14
116

the example above runs fine in v0.10.36, v0.12.0 and iojs v1.8.1. I am on Mac OS X.using the buffer list approach (as shown above) reports the number of bytes consumed.

—Reply to this email directly or view it on GitHub.

!DSPAM:1,553b62b822462714086026!