zsqk/deno-fn

feat: use magic number on Microsoft Windows, UTF-8 text files

Closed this issue · 0 comments

iugo commented
import { assertEquals } from 'https://deno.land/std@0.192.0/testing/asserts.ts';

Deno.test('test', async () => {
  const otext = `col,value
123,中文`;

  const u8a = textWithBOM(otext);
  await Deno.writeFile('./test.csv', u8a);

  const decoder = new TextDecoder();
  const res = await Deno.readFile('./test.csv');
  const restext = decoder.decode(res);
  console.log('res', res, decoder.decode(res));

  assertEquals(restext, otext);
});

// https://en.wikipedia.org/wiki/Magic_number_(programming)
// https://en.wikipedia.org/wiki/Byte_order_mark
/**
 * 为文本添加 UTF-8 BOM (for Windows...)
 * @param v
 * @returns
 */
export function textWithBOM(v: Uint8Array | string): Uint8Array {
  let data: Uint8Array;
  if (typeof v === 'string') {
    const encoder = new TextEncoder();
    data = encoder.encode(v);
  } else {
    data = v;
  }

  const mn = [239, 187, 191];
  const u8a = new Uint8Array(data.length + mn.length);
  u8a.set(mn);
  u8a.set(data, mn.length);

  return u8a;
}

https://en.wikipedia.org/wiki/Byte_order_mark

字节序标记, 常被简称为 BOM.

在 UTF-8 编码中, 实际是不需要的. 但是诸如 Windows 的一些老软件, 则将 BOM 视为必须, 否则会无法识别编码而出现乱码.