Optimization of websocket masking
tsctx opened this issue · 9 comments
tsctx commented
as titled
This is a cause of poor performance.
undici/lib/web/websocket/frame.js
Lines 85 to 88 in 0980f9d
Uzlopak commented
Well. we could reduce the for loop overhead by doing 4 masking operations instead of one.
ronag commented
As I indicated earlier. The masking is as far as I know totally useless for backend and could just be skipped...
Uzlopak commented
Maybe instead of i + 4 < length, you precalculate that too
const lengthFor4 = length - (length & 3)
tsctx commented
import { randomBytes } from "crypto";
import { group, bench, run } from "mitata";
function maskForOne(mask, buffer) {
const alloc = Buffer.allocUnsafe(buffer.length);
for (let i = 0; i < buffer.length; ++i) {
alloc[i] = buffer[i] ^ mask[i & 3];
}
return alloc;
}
function maskForFour(mask, buffer) {
const length = buffer.length;
const alloc = Buffer.allocUnsafe(buffer.length);
const lengthFor4 = length - (length & 3);
if (length > 3) {
for (let i = 0; i < lengthFor4; i += 4) {
alloc[i] = buffer[i] ^ mask[0];
alloc[i + 1] = buffer[i + 1] ^ mask[1];
alloc[i + 2] = buffer[i + 2] ^ mask[2];
alloc[i + 3] = buffer[i + 3] ^ mask[3];
}
}
for (let i = lengthFor4; i < length; ++i) {
alloc[i] = buffer[i] ^ mask[i & 3];
}
return alloc;
}
group("mask", () => {
const buffer = new Uint8Array(randomBytes(1024 * 4).buffer);
const mask = new Uint8Array(randomBytes(4).buffer);
bench("for 1", () => maskForOne(mask, buffer));
bench("for 4", () => maskForFour(mask, buffer));
});
await run();
• mask
------------------------------------------------- -----------------------------
for 1 13'381 ns/iter (6'200 ns … 536 µs) 7'400 ns 123 µs 188 µs
for 4 9'444 ns/iter (5'800 ns … 4'131 µs) 7'000 ns 122 µs 197 µs
summary for mask
for 4
1.42x faster than for 1
Uzlopak commented
maybe for the last potential 3 operations a duffs device?
tsctx commented
yes?
Uzlopak commented
Something like this. Sry, I still suffer from a heat stroke and cant provide better code.
switch (length & 3) {
case 3:
buffer[i + 3] ^ mask[3]
case 2:
buffer[i + 2] ^ mask[2]
case 2:
buffer[i + 1] ^ mask[1]
...
tsctx commented
Take care and get well soon.