One of my main clients use the Outlook email reader. Their Web-based UI needs to read and view those messages. This is where such things are taken care of.
To make this easy on the (old) server we’ll inline the images into the HTML from the “attachments” portion of the .msg
file.
First we need to parse the HTML. That’s node-html-parser.
We also need to know the type of the image. image-type
to the rescue
npm install node-html-parser image-type
import { parse } from 'node-html-parser';
import { default as iconvLite } from 'iconv-lite';
import { deEncapsulateSync } from 'rtf-stream-parser';
import { decompressRTF } from '@kenjiuno/decompressrtf';
import { escapeHTML } from '@wordpress/escape-html'
import fs from 'fs'
import imageType from 'image-type';
export const msgInternalHtml = (msg) => {
msg.parserConfig = msg.parserConfig || {};
const msgInfo = msg.getFileData();
const rtfb = msgInfo.compressedRtf
? Buffer.from(decompressRTF(msgInfo.compressedRtf))
: false,
rtf = rtfb
? deEncapsulateSync(rtfb, { decode: iconvLite.decode })
: false;
return rtf && rtf.mode === 'html' ? rtf.text
: '<pre>' + msgInfo.body + '</pre>'
}
export const msgInternalDoc = (msg) => {
const str = msgInternalHtml(msg)
return parse(str);
};
export const msgDocInlineImgs = (msg, doc) => {
return doc.querySelectorAll('img');
}
export const inlineCidImg = async (msg, img) => {
const src = img.getAttribute('src')
if (! src.startsWith('cid:')) {
return img
}
const decoder = new TextDecoder('utf8');
const cid = src.substring(4),
atto = msg.getFileData().attachments.find(i => i.pidContentId === cid),
content = !atto ? false
: msg.getAttachment(atto).content,
type = await imageType(content),
b64 = Buffer.from(content).toString('base64'),
newSrc = `data:${type.mime};base64,${b64}`
img.setAttribute('src', newSrc);
return img
}
export const msgBodyHtml = async (msg) => {
const ihtml = msgInternalHtml(msg),
doc = msgInternalDoc(msg),
imgs = msgDocInlineImgs(msg, doc);
// console.log('have imgs', imgs.map(i => i.getAttribute('src')));
await Promise.all(
imgs.map(i => inlineCidImg(msg, i))
);
// console.log('have imgs', imgs.map(i => i.getAttribute('src')));
return doc.outerHTML;
}
This is the heart of it for now. We want to export the message to JSON and grab any attachment.
import { program } from 'commander';
import fs from 'fs';
import path from 'path';
import pkg from '@kenjiuno/msgreader';
const MsgReader = pkg.default ;
import { default as iconvLite } from 'iconv-lite';
import { deEncapsulateSync } from 'rtf-stream-parser';
import { decompressRTF } from '@kenjiuno/decompressrtf';
import { escapeHTML } from '@wordpress/escape-html'
import { msgInternalHtml,
msgInternalDoc,
msgDocInlineImgs,
inlineCidImg,
msgBodyHtml
} from '../lib/html.js'
program
.command('body-html <msgFilePath> [saveToHtmlFilePath]')
.description('Parse msg file and return rtf body as HTML')
.action(async (msgFilePath, saveToHtmlFilePath) => {
const msgFileBuffer = fs.readFileSync(msgFilePath)
const msg = new MsgReader(msgFileBuffer)
const html = await msgBodyHtml(msg);
console.log(html)
});
program
.command('json <msgFilePath>')
.description('Parse msg file and print json structure')
.option('-f, --full-json', 'print full JSON')
.action((msgFilePath, options) => {
const msgFileBuffer = fs.readFileSync(msgFilePath)
const testMsg = new MsgReader(msgFileBuffer)
testMsg.parserConfig = testMsg.parserConfig || {};
const testMsgInfo = testMsg.getFileData();
const rtfb = Buffer.from(decompressRTF(testMsgInfo.compressedRtf)),
rtf = deEncapsulateSync(rtfb, { decode: iconvLite.decode });
const { attachments,
recipients,
subject,
senderName,
inetAcctName,
body } = testMsgInfo
console.log(
options.fullJson
? JSON.stringify(testMsgInfo, null, 2)
: JSON.stringify({
sender: {
name: senderName,
email: inetAcctName
},
recipients,
subject,
body, rtf, attachments
})
);
});
program
.command('rtf <msgFilePath> [saveToRtfFilePath]')
.description('Parse msg file and print decompressed rtf')
.action((msgFilePath, saveToRtfFilePath) => {
const msgFileBuffer = fs.readFileSync(msgFilePath)
const testMsg = new MsgReader(msgFileBuffer)
const testMsgInfo = testMsg.getFileData()
const body = Buffer.from(decompressRTF(testMsgInfo.compressedRtf))
if (typeof saveToRtfFilePath === "string" && saveToRtfFilePath.length >= 1) {
fs.writeFileSync(saveToRtfFilePath, body);
}
else {
console.log(body.toString('utf-8'));
}
});
program
.parse(process.argv);
Because Outlook messages contain a RTF file within them a lot of the time, and they are often HTML (FFS!), and we want to use HTML to output them, well, as luck would have it, we are not the only one.
npm install rtf-stream-parser iconv-lite '@wordpress/escape-html'
$ npm create vite@latest
Need to install the following packages:
create-vite@4.3.1
Ok to proceed? (y) y
✔ Project name: … ecm-msg
✔ Select a framework: › Vanilla
✔ Select a variant: › JavaScript
Scaffolding project in /home/drewc/me/ecm/src/ecm-msg/ecm-msg...
Done. Now run:
cd ecm-msg
npm install
npm run dev
cd ecm-msg ; mv * ../ ; mv .gitignore ../;
cd .. ; rmdir ecm-msg
npm install '@kenjiuno/msgreader'
npm install 'commander'
Forked from the repo: https://github.com/HiraokaHyperTools/msgreader/blob/master/cli.js
const program = require('commander');
const MsgReader = require('@kenjiuno/msgreader').default;
const { props, typeNames } = require('@kenjiuno/msgreader/lib/Defs');
const { Reader } = require('@kenjiuno/msgreader/lib/Reader');
const fs = require('fs');
const path = require('path');
const { decompressRTF } = require('@kenjiuno/decompressrtf');
program
.command('parse <msgFilePath>')
.description('Parse msg file and print parsed structure')
.option('-f, --full-json', 'print full JSON')
.option('-i, --include-raw-props', 'include raw (and also unknown) props')
.action((msgFilePath, options) => {
const msgFileBuffer = fs.readFileSync(msgFilePath)
const testMsg = new MsgReader(msgFileBuffer)
testMsg.parserConfig = testMsg.parserConfig || {};
if (options.includeRawProps) {
testMsg.parserConfig.includeRawProps = true;
}
const testMsgInfo = testMsg.getFileData();
console.log(
options.fullJson
? JSON.stringify(testMsgInfo, null, 2)
: testMsgInfo
);
});
program
.command('rtf <msgFilePath> [saveToRtfFilePath]')
.description('Parse msg file and print decompressed rtf')
.action((msgFilePath, saveToRtfFilePath) => {
const msgFileBuffer = fs.readFileSync(msgFilePath)
const testMsg = new MsgReader(msgFileBuffer)
const testMsgInfo = testMsg.getFileData()
const body = Buffer.from(decompressRTF(testMsgInfo.compressedRtf))
if (typeof saveToRtfFilePath === "string" && saveToRtfFilePath.length >= 1) {
fs.writeFileSync(saveToRtfFilePath, body);
}
else {
console.log(body.toString("utf8"));
}
});
function listAttachmentsRecursively(fieldsData, delimiter) {
const attachments = []
const walk = (fieldsData, prefix, attachments) => {
for (const att of fieldsData.attachments) {
if (att.innerMsgContent) {
attachments.push({
fileName: prefix + att.name + ".msg",
attachmentRef: att,
})
walk(att.innerMsgContentFields, att.name + delimiter, attachments);
}
else {
attachments.push({
fileName: prefix + att.fileName,
attachmentRef: att,
})
}
}
}
walk(fieldsData, "", attachments)
return attachments
}
program
.command('list-att <msgFilePath>')
.description('Parse msg file and list attachment file names')
.action((msgFilePath) => {
const msgFileBuffer = fs.readFileSync(msgFilePath)
const testMsg = new MsgReader(msgFileBuffer)
const testMsgInfo = testMsg.getFileData()
const attachments = listAttachmentsRecursively(testMsgInfo, "_");
for (let attachment of attachments) {
console.log(attachment)
}
});
program
.command('save-att <msgFilePath> <saveToDir>')
.description('Parse msg file and write all attachment files')
.action((msgFilePath, saveToDir) => {
const msgFileBuffer = fs.readFileSync(msgFilePath)
const testMsg = new MsgReader(msgFileBuffer)
const testMsgInfo = testMsg.getFileData()
fs.mkdirSync(path.resolve(saveToDir), { recursive: true })
const attachments = listAttachmentsRecursively(testMsgInfo, "_");
for (let attachment of attachments) {
const attFilePath = path.resolve(saveToDir, attachment.fileName);
fs.writeFileSync(attFilePath, testMsg.getAttachment(attachment.attachmentRef).content)
}
});
program
.command('dump <msgFilePath>')
.description('Dump msg file and print data')
.option('-p, --print-raw-data', 'print raw data')
.action((msgFilePath, options) => {
const msgFileBuffer = fs.readFileSync(msgFilePath)
const testMsg = new MsgReader(msgFileBuffer)
let msgIndex = 0
testMsg.parserConfig = {
propertyObserver: (fields, tag, raw) => {
if (fields.msgIndex === undefined) {
fields.msgIndex = msgIndex++;
}
{
const key = tag.toString(16).padStart(8, "0").toUpperCase();
const prop = props.filter(it => it.key === key).shift();
const type = typeNames[parseInt(key.substr(4), 16)];
console.info(
"msgIdx:", fields.msgIndex,
"dataType:", `'${fields.dataType}'`,
"tag:", `0x${key}`,
"name:", prop && prop.name || null,
"type:", type && type || null,
"size:", raw && raw.byteLength,
"data:", options.printRawData ? raw : undefined,
)
}
}
}
const testMsgInfo = testMsg.getFileData()
});
program
.command('expose <msgFilePath> <exportToDir>')
.description('Expose files/folders in Compound File Binary Format (CFBF)')
.action((msgFilePath, exportToDir, options) => {
const msgFileBuffer = fs.readFileSync(msgFilePath);
const store = new Reader(msgFileBuffer);
store.parse();
function expose(folder, saveTo) {
fs.mkdir(saveTo, { recursive: true }, (err) => {
if (err) {
return;
}
for (let fileName of folder.fileNames()) {
const array = folder.readFile(fileName);
const path = saveTo + "/" + fileName;
console.info(path);
fs.writeFileSync(path, (array === null) ? [] : array);
}
for (let subFolder of folder.subFolders()) {
expose(subFolder, saveTo + "/" + subFolder.name);
}
});
}
expose(store.rootFolder(), exportToDir);
});
program
.command('html <msgFilePath>')
.description('Parse msg file and display 1013001f:bodyHtml or 10130102:html')
.option('-e, --encoding <encoding>', 'The encoding type to decode binary html.', 'utf8')
.action((msgFilePath, options) => {
const msgFileBuffer = fs.readFileSync(msgFilePath);
const testMsg = new MsgReader(msgFileBuffer);
const testMsgInfo = testMsg.getFileData();
if (testMsgInfo.html !== undefined) {
console.log(Buffer.from(testMsgInfo.html).toString(options.encoding));
}
else if (testMsgInfo.bodyHtml !== undefined) {
console.log(testMsgInfo.bodyHtml);
}
else {
console.warn("no html is contained.");
}
});
program
.command('walk <msgFilePath>')
.description('Walk entire msg file as a raw CFBF')
.action((msgFilePath, options) => {
const msgFileBuffer = fs.readFileSync(msgFilePath);
const reader = new Reader(msgFileBuffer);
reader.parse();
function walk(folder, prefix) {
console.info("Walking folder:", prefix);
for (let fileSet of folder.fileNameSets()) {
const contents = fileSet.provider();
console.info("Verify file:", fileSet.name, "(", fileSet.length, ")", "read", contents.length, "bytes");
if (fileSet.length != contents.length) {
throw new Error();
}
}
for (let subFolder of folder.subFolders()) {
walk(subFolder, `${prefix}${subFolder.name}/`);
}
}
walk(reader.rootFolder(), "/");
});
program
.command('dummy1')
.action(() => {
const msgFileBuffer = fs.readFileSync('test/msgInMsg.msg');
const testMsg = new MsgReader(msgFileBuffer);
const testMsgInfo = testMsg.getFileData();
const testMsgAttachment0 = testMsg.getAttachment(0);
console.log(testMsgAttachment0);
});
program
.command('dummy2')
.action(() => {
const msgFileBuffer = fs.readFileSync('test/voteItems.msg');
const testMsg = new MsgReader(msgFileBuffer);
const testMsgInfo = testMsg.getFileData();
console.log(testMsgInfo);
});
program
.parse(process.argv);