Issue with a nested structure
gwd opened this issue · 1 comments
First, thank you so much for this library -- made my first grammar on Friday and I think the setup really helped make things straightforward.
I'm using participle v2.10.0, and trying to write a parser for strings like the following:
tests := []string{
"Xen 4.18.x",
"Linux 4.7.x",
"QEMU 4.7-RC series",
"Linux 4.7.x - Linux 4.9.x",
"xen-unstable",
"Xen 4.18.x - xen-unstable",
"QEMU 4.7-RC series - QEMU 4.10.x",
}
i.e., a version that may be a single version, or a range (separated by -
); that may be a single string, or <project> <version number>
.
I wrote the following participle
structures:
type ProjectVersion struct {
Project string `parser:"@Project"`
Version string `parser:"Space @(VersionString | RCVersionString)"`
}
type VersionSingle struct {
TipVersion *string `parser:"@TipVersion"`
ProjectVersion *ProjectVersion `parser:"| @@"`
}
type VersionRange struct {
From VersionSingle `parser:"@@"`
To VersionSingle `parser:"VersionDash @@"`
}
type Version struct {
Range *VersionRange `parser:"@@"`
Single *VersionSingle `parser:"| @@"`
}
var rulesCommon []lexer.SimpleRule{
{"Project", `Xen|Linux|QEMU|xapi`},
{"TipVersion", `xen-unstable`},
{"RCVersionString", `\d+\.\d+-RC series`},
{"VersionString", `\d+\.\d+\.x`},
{"VersionDash", " - "},
{"Space", ` `},
}
Unfortunately, I get errors like the following:
Parsing Xen 4.18.x: 1:11: unexpected token "<EOF>" (expected <versiondash> VersionSingle)
Parsing Linux 4.7.x: 1:12: unexpected token "<EOF>" (expected <versiondash> VersionSingle)
Parsing QEMU 4.7-RC series: 1:19: unexpected token "<EOF>" (expected <versiondash> VersionSingle)
In other words, it's somehow getting stuck on parsing something as a VersionRange, and not backing out and parsing it simply as a VersionSingle.
But this only happens if both Version and VersionSingle have at least two ways to be interpreted. If I replace the ProjectVersion with a single regexp that matches the same string, it works (here replacing the Version
lexer token with a ProjectVersion
token with the appropriate regexp).
// With the range, without the "project version"
type VersionSingleNoProjectVersion struct {
TipVersion *string `parser:"@TipVersion"`
ProjectVersion *string `parser:"| @ProjectVersion"`
}
type VersionRangeNoProjectVersion struct {
From VersionSingleNoProjectVersion `parser:"@@"`
To VersionSingleNoProjectVersion `parser:"VersionDash @@"`
}
type VersionNoProjectVersion struct {
Range *VersionRangeNoProjectVersion `parser:"@@"`
Single *VersionSingleNoProjectVersion `parser:"| @@"`
}
The problem for single items goes away if I get rid of the RangeVersion; but then of course you can't parse ranges:
// With "project version", without range
type VersionNoRange struct {
Single *VersionSingle `parser:"@@"`
}
Any idea what's going on?
For completeness, here's a complete testing function you can use to trigger the issue:
package participletest_test
import (
"testing"
"github.com/alecthomas/participle/v2"
"github.com/alecthomas/participle/v2/lexer"
)
// What I'd like:
type ProjectVersion struct {
Project string `parser:"@Project"`
Version string `parser:"Space @(VersionString | RCVersionString)"`
}
type VersionSingle struct {
TipVersion *string `parser:"@TipVersion"`
ProjectVersion *ProjectVersion `parser:"| @@"`
}
type VersionRange struct {
From VersionSingle `parser:"@@"`
To VersionSingle `parser:"VersionDash @@"`
}
type Version struct {
Range *VersionRange `parser:"@@"`
Single *VersionSingle `parser:"| @@"`
}
// With "project version", without range
type VersionNoRange struct {
Single *VersionSingle `parser:"@@"`
}
// With the range, without the "project version"
type VersionSingleNoProjectVersion struct {
TipVersion *string `parser:"@TipVersion"`
ProjectVersion *string `parser:"| @ProjectVersion"`
}
type VersionRangeNoProjectVersion struct {
From VersionSingleNoProjectVersion `parser:"@@"`
To VersionSingleNoProjectVersion `parser:"VersionDash @@"`
}
type VersionNoProjectVersion struct {
Range *VersionRangeNoProjectVersion `parser:"@@"`
Single *VersionSingleNoProjectVersion `parser:"| @@"`
}
var rulesCommon = []lexer.SimpleRule{
{"TipVersion", `xen-unstable`},
{"RCVersionString", `\d+\.\d+-RC series`},
{"VersionString", `\d+\.\d+\.x`},
{"VersionDash", " - "},
{"Space", ` `},
}
func TestVersion(t *testing.T) {
simpletests := []string{
"Xen 4.18.x",
"Linux 4.7.x",
"QEMU 4.7-RC series",
"xen-unstable",
}
rangetests := []string{
"Linux 4.7.x - Linux 4.9.x",
"Xen 4.18.x - xen-unstable",
"QEMU 4.7-RC series - QEMU 4.10.x",
}
lexProject := lexer.MustSimple(append(rulesCommon, lexer.SimpleRule{"Project", `Xen|Linux|QEMU|xapi`}))
lexProjectVersion := lexer.MustSimple(append(rulesCommon,
lexer.SimpleRule{"ProjectVersion", `(Xen|Linux|QEMU|xapi) (\d+\.\d+\.x|\d+\.\d+-RC series)`}))
pVersion := participle.MustBuild[Version](participle.Lexer(lexProject))
t.Log("Testing pVersion with simple and range")
for _, in := range append(simpletests, rangetests...) {
out, err := pVersion.ParseString("", in)
if err != nil {
t.Errorf("ERROR: Parsing %v: %v", in, err)
} else {
t.Logf("Parsing %v resulted in %v", in, out)
}
}
pVersionNoRange := participle.MustBuild[VersionNoRange](participle.Lexer(lexProject))
t.Log("Testing pVersionNoRange with simple only")
for _, in := range simpletests {
out, err := pVersionNoRange.ParseString("", in)
if err != nil {
t.Errorf("ERROR: Parsing %v: %v", in, err)
} else {
t.Logf("Parsing %v resulted in %v", in, out)
}
}
pVersionNoProjcetVersion := participle.MustBuild[VersionNoProjectVersion](participle.Lexer(lexProjectVersion))
t.Log("Testing pVersionNoProjectVersion with simple and range")
for _, in := range append(simpletests, rangetests...) {
out, err := pVersionNoProjcetVersion.ParseString("", in)
if err != nil {
t.Errorf("ERROR: Parsing %v: %v", in, err)
} else {
t.Logf("Parsing %v resulted in %v", in, out)
}
}
}
BTW, the following works for all tests:
type VersionSlice struct {
Range []VersionSingle `parser:"@@ ( VersionDash @@ )*"`
}
...
pVersionSlice := participle.MustBuild[VersionSlice](participle.Lexer(lexProject))
t.Log("Testing pVersionSlice with simple and range")
for _, in := range append(simpletests, rangetests...) {
out, err := pVersionSlice.ParseString("", in)
if err != nil {
t.Errorf("ERROR: Parsing %v: %v", in, err)
} else {
t.Logf("Parsing %v resulted in %v", in, out)
}
}
...
So I think I have a work-around for now; but if the first version is supposed to work, it would be good to track down what's going on.