/tinyentities

Encoding and decoding HTML entities shouldn't be half of your bundle size.

Primary LanguageTypeScriptMIT LicenseMIT

tinyentities

Encoding and decoding HTML entities shouldn't be half of your bundle size. Unfortunately, it's that way with some other libraries. Not with tinyentities.

Usage

import {
  decodeHTML,
  decodeXML,
  escapeHTML, // Use like entities' escapeText
  escapeHTMLAttribute, // Use like entities' escapeAttribute
  encodeHTML,
  escapeXML,
  escapeXMLAttribute, // Use like entities' escapeUTF8
  encodeXML,
  tryReadHTML, // Use when you would use entities' EntityDecoder
  tryReadXML, // Use when you would use entities' EntityDecoder
} from "tinyentities";

console.log(decodeHTML("&lt;hi&gt;")); // <hi>
console.log(decodeXML("&lt;hi&gt;")); // <hi>

console.log(escapeHTML("<hi>")); // &lt;hi&gt;
console.log(escapeHTMLAttribute("<hi>")); // &lt;hi&gt;
console.log(encodeHTML("<hi>")); // &lt;hi&gt;

console.log(escapeXML("<hi>")); // &lt;hi&gt;
console.log(escapeXMLAttribute("<hi>")); // &lt;hi&gt;
console.log(encodeXML("<hi>")); // &lt;hi&gt;

// An example of how you might wrap tryReadHTML / tryReadXML in a TransformStream:
// (will log <hi>)
const createStreamingEntityDecoder = (useXML) => {
  const read = useXML ? tryReadXML : tryReadHTML;
  let pending = "";
  return new TransformStream({
    transform(text, controller) {
      text = pending + text;
      pending = "";

      let start = 0; // start of the current segment to process

      for (let i = 0; i < text.length; i++) {
        if (text[i] != "&") continue;

        // Emit everything before "&" immediately
        if (i > start) {
          controller.enqueue(text.slice(start, i));
        }

        // Evaluate what's after "&"
        const afterAmp = text.slice(i + 1);
        const result = read(afterAmp);

        if (result.type == "keep-going") {
          // We might have an entity, but need more data. Hold from "&".
          pending = text.slice(i);
          return; // This chunk is finished
        } else if (result.type == "read") {
          // Emit the decoded entity
          controller.enqueue(result.content);

          // Advance past the entire entity: "&" + consumed
          const nextIndex = i + 1 + result.consumed;
          i = nextIndex - 1; // -1 because the loop will i++ next
          start = nextIndex;
        } else {
          // fail: not a valid entity; emit literal "&" and continue
          controller.enqueue("&");
          start = i + 1;
        }
      }

      // Emit any remaining text after the last processed segment
      if (start < text.length) {
        controller.enqueue(text.slice(start));
      }
    },

    flush(controller) {
      // If stream ends with an incomplete entity, emit it as-is
      if (pending) controller.enqueue(pending);
    },
  });
};
const stream = new Response(`&lt;hi&gt;`).body;
const textDecoder = new TextDecoderStream();
const entityDecoder = createStreamingEntityDecoder(false);
for await (const chunk of stream
  .pipeThrough(textDecoder)
  .pipeThrough(entityDecoder)) {
  process.stdout.write(chunk);
}

Benchmarks

escapeHTML

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities   157b /   128 gz 0.42µs 2.9ns/b
entities   339b /   263 gz 0.38µs 3.1ns/b
html-entities 28538b / 13146 gz 1,300µs 5.1ns/b

escapeHTMLAttribute

Note

tinyentities serializes < and > here for safety, making it slower.

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities   192b /   145 gz 0.28µs 4ns/b
entities   328b /   259 gz 0.37µs 1.8ns/b
html-entities 28538b / 13146 gz 1,300µs 5.1ns/b

escapeXML

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities   129b /   117 gz 0.25µs 2.4ns/b
entities   636b /   423 gz 0.4µs 5.2ns/b
html-entities 28550b / 13150 gz 1,300µs 5.2ns/b

escapeXMLAttribute

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities   189b /   143 gz 0.27µs 4.9ns/b
entities   636b /   423 gz 0.37µs 5.2ns/b
html-entities 28550b / 13150 gz 1,300µs 5.2ns/b

encodeHTML

Note

Other libraries have separate entity maps for encoding and decoding. If you're doing both, tinyentities will be smaller and not duplicate mappings. But if you only encode, like in this example, tinyentities will be slightly larger.

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities 18102b /  7839 gz 520µs 13ns/b
entities 14456b /  6247 gz 120µs 6.8ns/b
html-entities 28535b / 13148 gz 1,300µs 12ns/b

encodeXML

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities   267b /   225 gz 0.28µs 9.3ns/b
entities   636b /   423 gz 0.38µs 5.2ns/b
html-entities 28547b / 13153 gz 1,300µs 13ns/b

decodeHTML

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities 18130b /  7780 gz 550µs 9.2ns/b
entities 38623b / 22198 gz 46µs 7.2ns/b
html-entities 28343b / 13252 gz 1,300µs 11ns/b

decodeXML

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities   381b /   245 gz 0.29µs 7.8ns/b
entities  6483b /  2223 gz 5.4µs 6ns/b
html-entities 28357b / 13259 gz 1,300µs 10ns/b

tryReadHTML

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities 18606b /  8024 gz 520µs 15ns/b
entities 38277b / 22008 gz 41µs 13ns/b

tryReadXML

Implementation Size Initialize (sampled) Speed (sampled)
tinyentities   725b /   438 gz 0.27µs 11ns/b
entities  6141b /  2073 gz 5.4µs 11ns/b

Credit to

entities for showing the power of deltas in compression

html-entities for some awesome regex