h0tk3y/better-parse

Indentation as in YAML

OnufryKlaczynski opened this issue · 3 comments

Hello,
This probably isn't the place but I struggle with identation base grammar as in YAML.

If someone could provide simple solution how would you do this as in YAML. I could get my head around with example

@OnufryKlaczynski
Please take a look at this snippet where I tried to implement indentation checking with two custom combinators, one that maintains a context (the indentation level) and the other only accepting the result if a predicate is satisfied (in this case, it checks the indentation level):

package com.example

import com.example.Tree.Line
import com.example.Tree.Node
import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parseToEnd
import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.grammar.tryParseToEnd
import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.parser.*
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertTrue

class ConditionCheckingCombinator<T>(val parser: Parser<T>, val condition: (T) -> Boolean) : Parser<T> {
    override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> {
        return when (val innerResult = parser.tryParse(tokens, fromPosition)) {
            is Parsed -> if (condition(innerResult.value)) innerResult else ConditionNotSatisfied
            else -> innerResult
        }
    }
}

object ConditionNotSatisfied : ErrorResult()

fun <T> Parser<T>.onlyIf(predicate: (T) -> Boolean): Parser<T> = ConditionCheckingCombinator(this, predicate)

class ContextMaintainingCombinator<T>(
    val parser: Parser<T>,
    val onEnter: () -> Unit,
    val onExit: () -> Unit
) : Parser<T> {
    override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> {
        onEnter()
        return try {
            parser.tryParse(tokens, fromPosition)
        } finally {
            onExit()
        }
    }
}

sealed interface Tree {
    data class Line(val text: String) : Tree
    data class Node(val items: List<Tree>) : Tree
}

class IndentationExampleGrammar : Grammar<Tree>() {
    private val lbrace by literalToken("{")
    private val rbrace by literalToken("}")
    private val indent by literalToken("    ")
    private val text by regexToken("\\S.*")

    @Suppress("unused")
    private val newline by literalToken("\n", ignore = true)

    private val context = object {
        var currentIndent = 0

        inline fun <reified T> Parser<T>.withCurrentIndentation(): Parser<T> =
            -zeroOrMore(indent).onlyIf { it.size == currentIndent } * this

        inline fun <reified T> increaseIndentation(parser: Parser<T>): Parser<T> =
            ContextMaintainingCombinator(parser, onEnter = { currentIndent++ }, onExit = { currentIndent-- })
    }

    private val currentIndentedLine by with(context) {
        text.withCurrentIndentation().map { Line(it.text) }
    }

    private val bracedAndNextIndentedNode: Parser<Tree> by with(context) {
        -lbrace.withCurrentIndentation() *
                increaseIndentation(parser(::rootParser)) *
                -rbrace.withCurrentIndentation()
    }

    override val rootParser: Parser<Tree> by zeroOrMore(currentIndentedLine or bracedAndNextIndentedNode).map(Tree::Node)
}

class IndentationTest {
    val grammar = IndentationExampleGrammar()

    @Test
    fun empty() {
        assertEquals(Node(listOf()), grammar.parseToEnd(""))
    }

    @Test
    fun emptyIndentRejected() {
        assertTrue { grammar.tryParseToEnd("    ") is ErrorResult }
    }

    @Test
    fun rejectLineIndent() {
        assertTrue { grammar.tryParseToEnd("    test") is ErrorResult }
    }

    @Test
    fun simpleLines() {
        assertEquals(Node(listOf(Line("abc"), Line("def"))), grammar.parseToEnd("abc\ndef"))
    }

    @Test
    fun rejectIndentation() {
        val text = """
        abc
            def
        """.trimIndent()

        assertTrue { grammar.tryParseToEnd(text) is ErrorResult }
    }

    @Test
    fun acceptIndentation() {
        val text = """
        {
            {
            }
        }
        """.trimIndent()
        assertEquals(Node(listOf(Node(listOf(Node(listOf()))))), grammar.parseToEnd(text))
    }

    @Test
    fun mismatchedBraceIndentation() {
        val text = """
        {
        {
        }
        }
        """.trimIndent()

        assertTrue { grammar.tryParseToEnd(text) is ErrorResult }
    }

    @Test
    fun simpleLinesAndNode() {
        val text = """
        abc
        def
        {
            xyz
            zyx
        }
        """.trimIndent()

        assertEquals(
            Node(
                listOf(
                    Line("abc"),
                    Line("def"),
                    Node(listOf(Line("xyz"), Line("zyx")))
                )
            ),
            grammar.parseToEnd(text)
        )
    }

    @Test
    fun nestedIndentation() {
        val text = """
        {
            {
                {
                    this
                }
            }
        }
        """.trimIndent()

        assertEquals(
            Node(listOf(Node(listOf(Node(listOf(Node(listOf(Line("this"))))))))),
            grammar.parseToEnd(text)
        )
    }
}

I hope that YAML should be similar in this regard. Please let me know how it goes!

If the two combinators I suggested above turn out to be a good fit for writing grammars with indentations, I will consider adding them to the built-in combinators, and also providing an example in the docs.

Hello @h0tk3y, what we've ended up implementing is kind of similar to what you've proposed. Only that we've used a ThreadLocal for the mutable indentation level as our application is multithreaded. I don't like the fact that we need to use it and worry about thread safety in this solution. Would you be open to a PR that would extend the Parser signature to include an additional context parameter in the tryParse method? This would allow for context sensitive grammar without stateful logic.

Current implementation:

class IndentationParser(
  val singleIndentation: Parser<*>,
) : Parser<Unit> {

  override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<Unit> =
    (indentationLevel.get() times singleIndentation).asJust(Unit).tryParse(tokens,  fromPosition)
}

fun <T> indent(indentedParser: Parser<T>): IndentCombinator<T> =
  IndentCombinator(indentedParser)

class IndentCombinator<T>(
  val indentedParser: Parser<T>,
) : Parser<T> {

  override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> {
    val currentIndentationLevel = indentationLevel.get()
    try {
      indentationLevel.set(currentIndentationLevel + 1)
      return indentedParser.tryParse(tokens, fromPosition)
    } finally {
      indentationLevel.set(currentIndentationLevel)
    }
  }
}

internal val indentationLevel: ThreadLocal<Int> = ThreadLocal.withInitial { 0 }