pugjs/pug

Pug doesn't accept non-english tag and class names

MoamenAbdelsattar opened this issue · 3 comments

Pug Version: The version in https://pugjs.org/js/pug.js

Node Version: No node

I need pug to compile sources with non-English tag names. It's legal in HTML, but pugjs doesn't accept it. The reason why I need non-English tag names is that I'm editing Arabic documents. Arabic is written from Right-to-Left. If I write p tag at the beginning of the line, gedit (my text editor) will consider the whole line Left-to-Right. I need to type an Arabic tag name to make my text editor consider the rest of the line Right-To-Left.

I need to write ف بعض المحتوى and then compile it to:

<ف>
بعض المحتوى
</ف>

Then with javascript I can convert <ف> tags again to <p> tags easily.

pypugjs Does compile arabic tag names correctly, can you see how they accomplished it? Thank you.

Also can you guide me to a quick workaround if there is any?

Input JavaScript Values

pug.renderFile('input.pug', {
  whatIsIt: 'language',
});

Input Pug

ف بعض المحتوى

Expected HTML

<ف>بعض المحتوى</ف>

Error

pug.js:1 Uncaught Error: Pug:1:1
  > 1| ف بعض المحتوى
-------^

unexpected text "ف بعض"
    at t.exports (pug.js:1:654371)
    at a.error (pug.js:1:658197)
    at a.fail (pug.js:1:677518)
    at a.advance (pug.js:1:679063)
    at a.callLexerFunction (pug.js:1:677811)
    at a.getTokens (pug.js:1:679115)
    at t.exports (pug.js:1:658111)
    at Object.lex (pug.js:1:902905)
    at a.string (pug.js:1:682526)
    at g (pug.js:1:902652)

Additional Comments

It also doesn't accept arabic class names, giving the following error:

"ق" is not a valid class name.  Class names can only contain "_", "-", a-z and 0-9, and must contain at least one of "_", or a-z

For anyone facing the same issue as mine, use the following workaround, It's still not heavily tested though.

function toUnicode(str) {
	return str.split('').map(function (value, index, array) {
		var temp = value.charCodeAt(0).toString(16);
		temp = "0000".substr(0, 4 - temp.length) + temp;
		temp = temp.toUpperCase();
		return temp;
	}).join('');
}

function toChar(text) {
   return text.replace(/[\dA-F]{4}/g, 
          function (match) {
               return String.fromCharCode(parseInt(match, 16));
          });
}
var pug = require('pug');
function SafePugRender(source){
    patterns = [
        /^(\s*)((?:\p{L}|\d|_|-)+)/gmu, // Tag
        /(#\[\s*)((?:\p{L}|\d|_|-)+)/gmu, // Inline Tag
        /^(\s*(?:\p{L}|\d|_|-|#)*?\.)((?:\p{L}|\d|_|-)+)/gmu, // Class
        /(#\[\s*(?:\p{L}|\d|_|-|#)*?\.)((?:\p{L}|\d|_|-)+)/gmu, // Inline Class
        /^(\s*(?:\p{L}|\d|_|-|\.)*?#)((?:\p{L}|\d|_|-)+)/gmu, // Id
        /(#\[\s*(?:\p{L}|\d|_|-|\.)*?#)((?:\p{L}|\d|_|-)+)/gmu // Inline id
    ]
    for (let i = 0; i < patterns.length; i++){
        source = source.replaceAll(patterns[i], function(match, g1, g2){return g1 + '_Y' + toUnicode(g2) + '_Z'})
    }
    return pug.render(source).replaceAll(/_Y([\dA-F]*)_Z/gmu, function(match, g1){return toChar(g1)});
}

Please read the code carefully and understand it before using it.

Update: It's not legal to have non-English tag names in html, You need to map to English them before returning result. But it's legal to have arabic class names and id names. This is updated code

function toUnicode(str) {
	return str.split('').map(function (value, index, array) {
		var temp = value.charCodeAt(0).toString(16);
		temp = "0000".substr(0, 4 - temp.length) + temp;
		temp = temp.toUpperCase();
		return temp;
	}).join('');
}

function toChar(text) {
   return text.replace(/[\dA-F]{4}/g, 
          function (match) {
               return String.fromCharCode(parseInt(match, 16));
          });
}
tagsDict = {
    "ف" : "p",
    "ن" : "span",
    // etc..
}
var pug = require('pug');
function SafePugRender(source, pug){
    patterns = [
        /^(\s*(?:\p{L}|\d|_|-|#)*?\.)((?:\p{L}|\d|_|-)+)/gmu, // Class
        /(#\[\s*(?:\p{L}|\d|_|-|#)*?\.)((?:\p{L}|\d|_|-)+)/gmu, // Inline Class
        /^(\s*(?:\p{L}|\d|_|-|\.)*?#)((?:\p{L}|\d|_|-)+)/gmu, // Id
        /(#\[\s*(?:\p{L}|\d|_|-|\.)*?#)((?:\p{L}|\d|_|-)+)/gmu // Inline id
    ]
    tag_patterns = [
        /^(\s*)((?:\p{L}|\d|_|-)+)/gmu, // Tag
        /(#\[\s*)((?:\p{L}|\d|_|-)+)/gmu // Inline Tag
    ]
    for (let i = 0; i < tag_patterns.length; i++){
        source = source.replaceAll(tag_patterns[i], function(match, g1, g2){
            if (/^[a-z0-9_-]+$/i.test(g2)){
                return g1 + g2
            }
            else if (g2 in tagsDict){
                return g1 + tagsDict[g2];
            }
            return g1 + 'u--' + toUnicode(g2).toLowerCase();
        });
    }
    for (let i = 0; i < patterns.length; i++){
        source = source.replaceAll(patterns[i], function(match, g1, g2){return g1 + '_Y' + toUnicode(g2) + '_Z'})
    }
    result = pug.render(source);
    result = result.replaceAll(/_Y([\dA-F]*)_Z/gmu, function(match, g1){return toChar(g1)});
    return result;
}```