Indentation as in YAML
OnufryKlaczynski opened this issue · 3 comments
Hello,
This probably isn't the place but I struggle with identation base grammar as in YAML.
If someone could provide simple solution how would you do this as in YAML. I could get my head around with example
@OnufryKlaczynski
Please take a look at this snippet where I tried to implement indentation checking with two custom combinators, one that maintains a context (the indentation level) and the other only accepting the result if a predicate is satisfied (in this case, it checks the indentation level):
package com.example
import com.example.Tree.Line
import com.example.Tree.Node
import com.github.h0tk3y.betterParse.combinators.*
import com.github.h0tk3y.betterParse.grammar.Grammar
import com.github.h0tk3y.betterParse.grammar.parseToEnd
import com.github.h0tk3y.betterParse.grammar.parser
import com.github.h0tk3y.betterParse.grammar.tryParseToEnd
import com.github.h0tk3y.betterParse.lexer.TokenMatchesSequence
import com.github.h0tk3y.betterParse.lexer.literalToken
import com.github.h0tk3y.betterParse.lexer.regexToken
import com.github.h0tk3y.betterParse.parser.*
import kotlin.test.Test
import kotlin.test.assertEquals
import kotlin.test.assertTrue
class ConditionCheckingCombinator<T>(val parser: Parser<T>, val condition: (T) -> Boolean) : Parser<T> {
override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> {
return when (val innerResult = parser.tryParse(tokens, fromPosition)) {
is Parsed -> if (condition(innerResult.value)) innerResult else ConditionNotSatisfied
else -> innerResult
}
}
}
object ConditionNotSatisfied : ErrorResult()
fun <T> Parser<T>.onlyIf(predicate: (T) -> Boolean): Parser<T> = ConditionCheckingCombinator(this, predicate)
class ContextMaintainingCombinator<T>(
val parser: Parser<T>,
val onEnter: () -> Unit,
val onExit: () -> Unit
) : Parser<T> {
override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> {
onEnter()
return try {
parser.tryParse(tokens, fromPosition)
} finally {
onExit()
}
}
}
sealed interface Tree {
data class Line(val text: String) : Tree
data class Node(val items: List<Tree>) : Tree
}
class IndentationExampleGrammar : Grammar<Tree>() {
private val lbrace by literalToken("{")
private val rbrace by literalToken("}")
private val indent by literalToken(" ")
private val text by regexToken("\\S.*")
@Suppress("unused")
private val newline by literalToken("\n", ignore = true)
private val context = object {
var currentIndent = 0
inline fun <reified T> Parser<T>.withCurrentIndentation(): Parser<T> =
-zeroOrMore(indent).onlyIf { it.size == currentIndent } * this
inline fun <reified T> increaseIndentation(parser: Parser<T>): Parser<T> =
ContextMaintainingCombinator(parser, onEnter = { currentIndent++ }, onExit = { currentIndent-- })
}
private val currentIndentedLine by with(context) {
text.withCurrentIndentation().map { Line(it.text) }
}
private val bracedAndNextIndentedNode: Parser<Tree> by with(context) {
-lbrace.withCurrentIndentation() *
increaseIndentation(parser(::rootParser)) *
-rbrace.withCurrentIndentation()
}
override val rootParser: Parser<Tree> by zeroOrMore(currentIndentedLine or bracedAndNextIndentedNode).map(Tree::Node)
}
class IndentationTest {
val grammar = IndentationExampleGrammar()
@Test
fun empty() {
assertEquals(Node(listOf()), grammar.parseToEnd(""))
}
@Test
fun emptyIndentRejected() {
assertTrue { grammar.tryParseToEnd(" ") is ErrorResult }
}
@Test
fun rejectLineIndent() {
assertTrue { grammar.tryParseToEnd(" test") is ErrorResult }
}
@Test
fun simpleLines() {
assertEquals(Node(listOf(Line("abc"), Line("def"))), grammar.parseToEnd("abc\ndef"))
}
@Test
fun rejectIndentation() {
val text = """
abc
def
""".trimIndent()
assertTrue { grammar.tryParseToEnd(text) is ErrorResult }
}
@Test
fun acceptIndentation() {
val text = """
{
{
}
}
""".trimIndent()
assertEquals(Node(listOf(Node(listOf(Node(listOf()))))), grammar.parseToEnd(text))
}
@Test
fun mismatchedBraceIndentation() {
val text = """
{
{
}
}
""".trimIndent()
assertTrue { grammar.tryParseToEnd(text) is ErrorResult }
}
@Test
fun simpleLinesAndNode() {
val text = """
abc
def
{
xyz
zyx
}
""".trimIndent()
assertEquals(
Node(
listOf(
Line("abc"),
Line("def"),
Node(listOf(Line("xyz"), Line("zyx")))
)
),
grammar.parseToEnd(text)
)
}
@Test
fun nestedIndentation() {
val text = """
{
{
{
this
}
}
}
""".trimIndent()
assertEquals(
Node(listOf(Node(listOf(Node(listOf(Node(listOf(Line("this"))))))))),
grammar.parseToEnd(text)
)
}
}
I hope that YAML should be similar in this regard. Please let me know how it goes!
If the two combinators I suggested above turn out to be a good fit for writing grammars with indentations, I will consider adding them to the built-in combinators, and also providing an example in the docs.
Hello @h0tk3y, what we've ended up implementing is kind of similar to what you've proposed. Only that we've used a ThreadLocal
for the mutable indentation level as our application is multithreaded. I don't like the fact that we need to use it and worry about thread safety in this solution. Would you be open to a PR that would extend the Parser
signature to include an additional context
parameter in the tryParse
method? This would allow for context sensitive grammar without stateful logic.
Current implementation:
class IndentationParser(
val singleIndentation: Parser<*>,
) : Parser<Unit> {
override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<Unit> =
(indentationLevel.get() times singleIndentation).asJust(Unit).tryParse(tokens, fromPosition)
}
fun <T> indent(indentedParser: Parser<T>): IndentCombinator<T> =
IndentCombinator(indentedParser)
class IndentCombinator<T>(
val indentedParser: Parser<T>,
) : Parser<T> {
override fun tryParse(tokens: TokenMatchesSequence, fromPosition: Int): ParseResult<T> {
val currentIndentationLevel = indentationLevel.get()
try {
indentationLevel.set(currentIndentationLevel + 1)
return indentedParser.tryParse(tokens, fromPosition)
} finally {
indentationLevel.set(currentIndentationLevel)
}
}
}
internal val indentationLevel: ThreadLocal<Int> = ThreadLocal.withInitial { 0 }