feat: use magic number on Microsoft Windows, UTF-8 text files
Closed this issue · 0 comments
iugo commented
import { assertEquals } from 'https://deno.land/std@0.192.0/testing/asserts.ts';
Deno.test('test', async () => {
const otext = `col,value
123,中文`;
const u8a = textWithBOM(otext);
await Deno.writeFile('./test.csv', u8a);
const decoder = new TextDecoder();
const res = await Deno.readFile('./test.csv');
const restext = decoder.decode(res);
console.log('res', res, decoder.decode(res));
assertEquals(restext, otext);
});
// https://en.wikipedia.org/wiki/Magic_number_(programming)
// https://en.wikipedia.org/wiki/Byte_order_mark
/**
* 为文本添加 UTF-8 BOM (for Windows...)
* @param v
* @returns
*/
export function textWithBOM(v: Uint8Array | string): Uint8Array {
let data: Uint8Array;
if (typeof v === 'string') {
const encoder = new TextEncoder();
data = encoder.encode(v);
} else {
data = v;
}
const mn = [239, 187, 191];
const u8a = new Uint8Array(data.length + mn.length);
u8a.set(mn);
u8a.set(data, mn.length);
return u8a;
}
https://en.wikipedia.org/wiki/Byte_order_mark
字节序标记, 常被简称为 BOM.
在 UTF-8 编码中, 实际是不需要的. 但是诸如 Windows 的一些老软件, 则将 BOM 视为必须, 否则会无法识别编码而出现乱码.