mathjax/MathJax-demos-node

tex2mml-page with inlineMath

Closed this issue · 2 comments

I must be doing something wrong or have some sort of misconception, but when using component/tex2mml-page and configuring inlineMath: [['\\(', '\\)'], ['$*$', '$*$'], ['$', '$']], is this the correct and expected output? I was expecting escaped dollars to remain untouched.

Given the following HTML:

<html>
<head></head>
<body>

<p>
  $e=mc^2$
</p>

<p> Cats \$4.00 and dogs \$5.00 </p>

</body>
</html>
Actual output
<html>
<head></head>
<body>

<p>
<math xmlns="http://www.w3.org/1998/Math/MathML">
<mi>e</mi>
<mo>=</mo>
<mi>m</mi>
<msup>
  <mi>c</mi>
  <mn>2</mn>
</msup>
</math>
</p>

<p> Cats <math xmlns="http://www.w3.org/1998/Math/MathML">
<mrow data-mjx-texclass="ORD">
  <mo>$</mo>
</mrow>
</math>4.00 and dogs <math xmlns="http://www.w3.org/1998/Math/MathML">
<mrow data-mjx-texclass="ORD">
  <mo>$</mo>
</mrow>
</math>5.00 </p>

</body>
</html>
Expected output
<html>
<head></head>
<body>

<p>
<math xmlns="http://www.w3.org/1998/Math/MathML">
<mi>e</mi>
<mo>=</mo>
<mi>m</mi>
<msup>
  <mi>c</mi>
  <mn>2</mn>
</msup>
</math>
</p>

<p> Cats <span>$</span>4.00 and dogs <span>$</span>5.00 </p>

</body>
</html>
modified `component/tex2mml-page`
#! /usr/bin/env -S node -r esm

/*************************************************************************
 *
 *  component/tex2mml-page
 *
 *  Uses MathJax v3 to convert all TeX in an HTML document to MathML.
 *
 * ----------------------------------------------------------------------
 *
 *  Copyright (c) 2020 The MathJax Consortium
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

//
//  The default TeX packages to use
//
const PACKAGES = 'base, autoload, ams, newcommand, require';

//
//  Get the command-line arguments
//
var argv = require('yargs')
    .demand(0).strict()
    .usage('$0 [options] "math"')
    .options({
        em: {
            default: 16,
            describe: 'em-size in pixels'
        },
        packages: {
            default: PACKAGES,
            describe: 'the packages to use, e.g. "base, ams"; use "*" to represent the default packages, e.g, "*, bbox"'
        },
        dist: {
            boolean: true,
            default: false,
            describe: 'true to use webpacked version, false to use MathJax source files'
        }
    })
    .argv;

//
//  Read the HTML file
//
const htmlfile = require('fs').readFileSync(argv._[0], 'utf8');

//
//  A renderAction to take the place of typesetting.
//  It renders the output to MathML instead.
//
function actionMML(math, doc) {
  const adaptor = doc.adaptor;
  const mml = MathJax.startup.toMML(math.root);
  math.typesetRoot = adaptor.firstChild(adaptor.body(adaptor.parse(mml, 'text/html')));
}

//
//  Configure MathJax
//
MathJax = {
    loader: {
        paths: {mathjax: 'mathjax-full/es5'},
        source: (argv.dist ? {} : require('mathjax-full/components/src/source.js').source),
        require: require,
        load: ['input/tex-full', 'adaptors/liteDOM']
    },
    options: {
        renderActions: {
          typeset: [150, (doc) => {for (const math of doc.math) actionMML(math, doc)}, actionMML]
        }
    },
    tex: {
        packages: argv.packages.replace('\*', PACKAGES).split(/\s*,\s*/),
        inlineMath: [['\\(', '\\)'], ['$*$', '$*$'], ['$', '$']],
    },
    'adaptors/liteDOM': {
        fontSize: argv.em
    },
    startup: {
        document: htmlfile
    }
}

//
//  Load the MathJax startup module
//
require('mathjax-full/' + (argv.dist ? 'es5' : 'components/src/startup') + '/startup.js');

//
//  Wait for MathJax to start up, and then render the math.
//  Then output the resulting HTML file.
//
MathJax.startup.promise.then(() => {
    const adaptor = MathJax.startup.adaptor;
    const html = MathJax.startup.document;
    html.render();
    console.log(adaptor.doctype(html.document));
    console.log(adaptor.outerHTML(adaptor.root(html.document)));
}).catch(err => console.log(err));
dpvc commented

Thanks for the report. The problem is with the actionMML() function, which doesn't take the escaped characters properly into account. If you replace it with

function actionMML(math, doc) {
  const adaptor = doc.adaptor;
  if (math.display === null) {
    math.typesetRoot = adaptor.node('span', {}, [adaptor.text(math.math)]);
  } else {
    const mml = MathJax.startup.toMML(math.root);
    math.typesetRoot = adaptor.firstChild(adaptor.body(adaptor.parse(mml, 'text/html')));
  }
}

That should make it work as you expect.

Yes, that does indeed work as expected. Thank you for the help!