h0tk3y/better-parse

Skip/Ignore/Optional parts

rabbitfr opened this issue · 1 comments

Hi,

I have some questions about regexTokens and not parsing some parts of an input.

See file at bottom for reference.

Ignore

In main() there is a line to parse :

 rule: StartBonus = GRAPH 1:entrance(x=2, y=3)

But StartBonus may have a constructor like in entrance, which may
be empty, like in

rule: StartBonus() = GRAPH 1:entrance(x=2, y=3)

Or it can have things inside, but I do not want to parse them. I want to ignore them.

rule: StartBonus(foo) = GRAPH 1:entrance(x=2, y=3)

But when I try to add a regexp like ([^)].*) it messes up with for example lpar matching.

regexTokens

I think it's related.

At start of input file there is :

version: 0.5f

0.5f regexp (not present in file) matches a lot of things.

So in the next line

alphabetFile: missionAlphabet.xpr

will be matched by the 0.5f regexp

Sample (can be run) code :

import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parseToEnd
import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken

interface Line

data class Version(val value: String) : Line
data class AlphabetFile(val file: String) : Line

sealed class Expression
data class TileMap(val width: Int, val height: Int) : Expression()
data class Graph(val symbols: List<Symbol>) : Expression()

sealed class Symbol
data class Node(val id: Int, val name: String, val constructor: Constructor) : Symbol()
//data class Edge(val id: String, val name: String, val source: String, val target: String) : Symbol

// Assignment or BooleanExpression
sealed class Argument
data class Assignment(val variable: String, val value: Any) : Argument()

data class Constructor(val arguments: List<Argument>)

data class Start(val expression: Expression) : Line
data class Rule(val name: String, val type: Expression) : Line

data class GGrammar(
        val version: Version,
        val alphabetFile: AlphabetFile,
        val start: Start,
        val rule: List<Rule>) {

    override fun toString(): String {
        return listOf(version, alphabetFile, start, rule.joinToString("\n")).joinToString("\n")
    }
}

object GrammarParser2 : Grammar<GGrammar>() {

    // literal first
    private val versionLiteral by literalToken("version: ")
    private val version by literalToken("0.5f") // bad
    private val alphabetFileLiteral by literalToken("alphabetFile: ")
    private val alphabet by literalToken("missionAlphabet.xpr") // bad
    private val startLiteral by literalToken("start: ")
    private val graphLiteral by literalToken("GRAPH")
    private val tileMapLiteral by literalToken("TILEMAP")
    private val rule by literalToken("rule: ")

    private val colon by literalToken(":")
    private val lpar by literalToken("(")
    private val rpar by literalToken(")")
    private val set by literalToken("=")
    private val coma by literalToken(", ")

    // main grammar parsers
    val versionParser by (versionLiteral and parser(this::version)) use { Version(t2.text) }
    val alphabetParser by (alphabetFileLiteral and parser(this::alphabet)) use { AlphabetFile(t2.text) }

    val tileMapParser by (tileMapLiteral and -parser(this::ws) and parser(this::integer) and -parser(this::ws) and parser(this::integer)) use { TileMap(t2.text.toInt(), t3.text.toInt()) }

    val assignmentParser by (parser(this::word) and -set and (parser(this::word) or parser(this::integer))) use { Assignment(t1.text, t2.text) }

    val constructorParser by (lpar and separatedTerms(assignmentParser, coma) and rpar) use { Constructor(t2) }

    val nodeParser by (parser(this::integer) and -colon and parser(this::word) and constructorParser) use { Node(t1.text.toInt(), t2.text, t3 ) }

    val graphParser by (graphLiteral and -parser(this::ws) and nodeParser) use { Graph(listOf(t2)) }

    val expressionParser = (tileMapParser or graphParser)

    val startParser by (startLiteral and expressionParser ) map { Start(it.t2) }
alphabetFile: missionAlphabet.xpr
    val ruleParser by separatedTerms((-rule and parser(this::word) and set and expressionParser) use { Rule(t1.text, t3) }, parser(this::NEWLINE))

    // regex last
    private val NEWLINE by regexToken("\n")

    private val integer by regexToken("\\d+")
    private val word by regexToken("\\w+")

    private val ws by regexToken("\\s+", ignore = true)

    override val rootParser by (
                    versionParser   * -NEWLINE *
                    alphabetParser  * -NEWLINE *
                    startParser     * -NEWLINE *
                    ruleParser
            ).map {
                GGrammar(it.t1, it.t2, it.t3, it.t4)
            }
}


fun main() {

    val text = """
            version: 0.5f
            alphabetFile: missionAlphabet.xpr
            start: GRAPH 0:Start(var=12, y=20)
            rule: StartBonus = GRAPH 1:entrance(x=2, y=3)
        """.trimIndent()

    println(GrammarParser2.parseToEnd(text))
}

Found out by myself for the constructor part, using optional and acceptZero = true

data class Function(val name: String, val arguments: List<Argument>)

data class Constructor(val arguments: List<Argument>)

object test : Grammar<Function>() {

    private val lpar by literalToken("(")

    private val rpar by literalToken(")")

    private val set by literalToken("=")

    private val coma by literalToken(", ")

    private val variable by regexToken("\\w+")

    private val string by regexToken("\"\\w+\"")

    private val integer by regexToken("\\d+")

    private val ws by regexToken("\\s+", ignore = true)

    val assignmentParser by (variable and set and ( variable or integer or string )) use { Assignment(t1.text,
            when (t3.type) {
                variable -> t3.text
                integer -> t3.text.toInt()
                string -> t3.text
                else -> throw IllegalStateException("unkown type ${t3.type}")
            })}

    val constructorParser by (lpar and separatedTerms(assignmentParser, coma, acceptZero = true) and rpar) use { Constructor(t2) }

    private val functionParser by (variable and optional(constructorParser)) use { Function(t1.text, t2?.arguments ?: emptyList())}

    override val rootParser by functionParser
}

fun main() {

    println(test.parseToEnd("hello"))
    println(test.parseToEnd("hello()"))
    println(test.parseToEnd("hello(x=1)"))
    println(test.parseToEnd("hello(x=pouet)"))
    println(test.parseToEnd("hello(x=\"pouet\", y=2)"))

}
Function(name=hello, arguments=[])
Function(name=hello, arguments=[])
Function(name=hello, arguments=[Assignment(variable=x, value=1)])
Function(name=hello, arguments=[Assignment(variable=x, value=pouet)])
Function(name=hello, arguments=[Assignment(variable=x, value="pouet"), Assignment(variable=y, value=2)])