"<" symbol not parsed as text when common browsers do it
davidfoliveira opened this issue · 2 comments
davidfoliveira commented
If a text on the HTML has a "<", the text is not parsed after that.
Example: <title>We <3cupcakes</title>
The "<3cupcakes" is interpreted like being a tag when common browsers parse it like text.
davidfoliveira commented
Suggestion of patch.
diff -rc node_modules/htmlparser/lib/htmlparser.js node_modules/new_htmlparser/lib/htmlparser.js
*** node_modules/htmlparser/lib/htmlparser.js Thu Apr 12 19:04:06 2012
--- node_modules/new_htmlparser/lib/htmlparser.js Tue Mar 24 01:11:47 2015
***************
*** 219,225 ****
this._next = Parser._reTags.lastIndex - 1;
var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
!
//A new element to eventually be appended to the element list
var element = {
raw: rawData
--- 219,238 ----
this._next = Parser._reTags.lastIndex - 1;
var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
! // A tag element that doesn't finish with a '>' ? Naah.. this is text
! if ( this._parseState == ElementType.Tag && tagSep != ">" && rawData.substr(0,3) != "!--" ) {
! var prevElement = (this._elements.length > 0) ? this._elements[this._elements.length-1] : null;
! if ( prevElement && prevElement.type == ElementType.Text ) {
! prevElement.raw += '<'+rawData;
! prevElement.data += '<'+rawData;
! this._current = this._next+1;
! continue;
! }
! else {
! this._parseState = ElementType.Text;
! rawData = '<'+rawData;
! }
! }
//A new element to eventually be appended to the element list
var element = {
raw: rawData
eranimo commented
Can you submit a PR?