FasterXML/aalto-xml

How to parse XML strings with unbound namespace prefixes?

Opened this issue · 0 comments

I am trying to parse an XML string such as <theprefix:entry>whatever</theprefix:entry> using an AsyncXMLStreamReader.

But I get an Exception in thread "main" com.fasterxml.aalto.WFCException: Unbound namespace prefix 'theprefix' (for element name 'theprefix:entry') error.

If, however, I try with the unprefixed version: <entry>whatever</entry>, it all works fine.

I do not have control over the input XML I am going to need parsing (XMPP Protocol messages in my case).

Is there anything I could to o configure at the factory/parser level to allow for this to be a valid and parseable XML?

Please refer to this jbang script I am using to reproduce. If you'd rather have a complete maven or gradle project, let me know and I will prepare one:

///usr/bin/env jbang "$0" "$@" ; exit $?

//DEPS com.fasterxml:aalto-xml:1.3.3

import static javax.xml.XMLConstants.DEFAULT_NS_PREFIX;
import static javax.xml.XMLConstants.NULL_NS_URI;

import java.nio.charset.StandardCharsets;
import java.util.Objects;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;

import com.fasterxml.aalto.AsyncByteArrayFeeder;
import com.fasterxml.aalto.AsyncXMLInputFactory;
import com.fasterxml.aalto.AsyncXMLStreamReader;
import com.fasterxml.aalto.in.FixedNsContext;
import com.fasterxml.aalto.stax.InputFactoryImpl;

public class XmlParserTest {

    private static final AsyncXMLInputFactory INPUT_FACTORY = new InputFactoryImpl();

    private static final String XML_TXT = "<theprefix:entry>whatever</theprefix:entry>";
    // private static final String XML_TXT = "<entry>whatever</entry>"; <== This works

    private static void assertTokenType(int expectedTokenType, int parserState) {
        if (parserState != expectedTokenType) {
            throw new IllegalStateException("Expected state %d, got %d"
                .formatted(expectedTokenType, parserState));
        }
    }

    private static void assertEquals(Object expeectedValue, Object parserValue) {
        if (!Objects.equals(expeectedValue, parserValue)) {
            throw new IllegalStateException("Expected qname %s, got %s"
                .formatted(expeectedValue, parserValue));
        }
    }

    public static void main(String... args) throws XMLStreamException {
        AsyncXMLStreamReader<AsyncByteArrayFeeder> parser = INPUT_FACTORY.createAsyncForByteArray();
        byte[] bytes = XML_TXT.getBytes(StandardCharsets.UTF_8);
        parser.getInputFeeder().feedInput(bytes, 0, bytes.length);
        assertTokenType(XMLStreamConstants.START_DOCUMENT, parser.next());
        assertTokenType(XMLStreamConstants.START_ELEMENT, parser.next());
        // assertEquals(new QName(NULL_NS_URI, "entry", DEFAULT_NS_PREFIX), parser.getName());
        assertEquals(new QName(NULL_NS_URI, "entry", "theprefix"), parser.getName());
        assertTokenType(XMLStreamConstants.CHARACTERS, parser.next());
        assertEquals("whatever", parser.getText());
        assertTokenType(XMLStreamConstants.END_ELEMENT, parser.next());
        assertTokenType(AsyncXMLStreamReader.EVENT_INCOMPLETE, parser.next());
        parser.getInputFeeder().endOfInput();
        assertTokenType(XMLStreamConstants.END_DOCUMENT, parser.next());
        parser.close();
    }
}

The error exception reads:

jbang "/Users/miguelgl/development/learning/xml-bugreport/XmlParserTest.java"
[jbang] Building jar for XmlParserTest.java...
Exception in thread "main" com.fasterxml.aalto.WFCException: Unbound namespace prefix 'theprefix' (for element name 'theprefix:entry')
 at [row,col {unknown-source}]: [1,18]
        at com.fasterxml.aalto.in.XmlScanner.reportInputProblem(XmlScanner.java:1333)
        at com.fasterxml.aalto.in.XmlScanner.reportUnboundPrefix(XmlScanner.java:1473)
        at com.fasterxml.aalto.async.AsyncByteArrayScanner.finishStartElement(AsyncByteArrayScanner.java:1865)
        at com.fasterxml.aalto.async.AsyncByteArrayScanner.handleStartElement(AsyncByteArrayScanner.java:1661)
        at com.fasterxml.aalto.async.AsyncByteArrayScanner.handleStartElementStart(AsyncByteArrayScanner.java:1606)
        at com.fasterxml.aalto.async.AsyncByteScanner.nextFromProlog(AsyncByteScanner.java:887)
        at com.fasterxml.aalto.stax.StreamReaderImpl.next(StreamReaderImpl.java:790)
        at XmlParserTest.main(XmlParserTest.java:46)