Parser API in Go
The 'go_parser' package is an API to help you create hand-written lexers and parsers.
The package was inspired by Rob Pikes' video Lexical Scanning In Go and golang's 'template' package.
Below is the interface for the main Parser type:
// Parser helps you process lexer tokens
type Parser interface {
// PeekTokenType allows you to look ahead at tokens without consuming them
PeekTokenType(int) lexer.TokenType
// PeekToken allows you to look ahead at tokens without consuming them
PeekToken(int) *lexer.Token
// NextToken consumes and returns the next token
NextToken() *lexer.Token
// SkipToken consumes the next token without returning it
SkipToken()
// SkipTokens consumes the next n tokens without returning them
SkipTokens(int)
// BackupToken un-consumes the last token
BackupToken ()
// BackupTokens un-consumes the last n tokens
BackupTokens(int)
// ClearTokens clears all consumed tokens
ClearTokens()
// Emit emits an object, consuming matched tokens
Emit (interface{})
EOF() bool
// Next retrieves the next emitted item
Next() interface{}
// Marker returns a marker that you can use to reset the parser state later
Marker() *Marker
// Reset resets the lexer state to the specified marker
Reset(*Marker)
}
Below is a sample calculator program that uses the parser (and lexer) API:
//
// calc.go implements a simple calculator using the iNamik lexer and parser api.
//
// Input is read from STDIN
//
// The input expression is matched against the following pattern:
//
// input_exp:
// ( id '=' )? general_exp
// general_exp:
// operand ( operator operand )?
// operand:
// number | id | '(' general_exp ')'
// operator:
// '+' | '-' | '*' | '/'
// number:
// digit+ ( '.' digit+ )?
// digit:
// ['0'..'9']
// id:
// alpha ( alpa | digit )*
// alpha:
// ['a'..'z'] | ['A'..'Z']
//
// Precedence is as expected, with '*' and '/' have higher precedence
// than '+' and '-', as follows:
//
// 1 + 2 * 3 - 4 / 5 == 1 + (2 * 3) - (4 / 5)
//
package main
import (
"bufio"
"bytes"
"fmt"
"os"
"strings"
"strconv"
)
import (
"github.com/iNamik/go_lexer"
"github.com/iNamik/go_parser"
)
// We define our lexer tokens starting from the pre-defined EOF token
const (
T_EOF lexer.TokenType = lexer.TokenTypeEOF
T_NIL = lexer.TokenTypeEOF + iota
T_ID
T_NUMBER
T_PLUS
T_MINUS
T_MULTIPLY
T_DIVIDE
T_EQUALS
T_OPEN_PAREN
T_CLOSE_PAREN
)
// To store variables
var vars = map[string] float64 {}
// Single-character tokens
var singleChars = []byte { '+' , '-' , '*' , '/' , '=' , '(' , ')' }
var singleTokens = []lexer.TokenType { T_PLUS, T_MINUS, T_MULTIPLY, T_DIVIDE, T_EQUALS, T_OPEN_PAREN, T_CLOSE_PAREN }
// Multi-character tokens
var rangeWhitespace = []byte { ' ', '\t' }
var rangeDigits = lexer.RangeToBytes("0-9")
var rangeAlpha = lexer.RangeToBytes("a-zA-Z")
var rangeAlphaNum = lexer.RangeToBytes("0-9a-zA-Z")
// main
func main() {
// Create a buffered reader from STDIN
stdin := bufio.NewReader(os.Stdin)
for {
// Read a line of input
input, _, err := stdin.ReadLine()
// Error? we're done
if nil != err {
break
}
// Anything to process?
if len(input) > 0 {
// Create a new lexer to turn the input text into tokens
l := lexer.NewLexer(lex, strings.NewReader(string(input)), len(input), 2)
// Create a new parser that feeds off the lexer and generates expression values
p := parser.NewParser(parse, l, 2)
// Loop over parser emits
for i := p.Next() ; nil != i ; i = p.Next() {
fmt.Printf("%v\n", i)
}
}
}
}
// lex is the starting (and only) StateFn for lexing the input into tokens
func lex(l lexer.Lexer) lexer.StateFn {
// EOF
if l.MatchEOF() {
l.EmitEOF()
return nil // We're done here
}
// Single-char token?
if i := bytes.IndexRune(singleChars, l.PeekRune(0)) ; i >= 0 {
l.NextRune()
l.EmitToken(singleTokens[i])
return lex
}
switch {
// Skip whitespace
case l.MatchOneOrMore(rangeWhitespace) :
l.IgnoreToken()
// Number
case l.MatchOneOrMore(rangeDigits) :
if l.PeekRune(0) == '.' {
l.NextRune() // skip '.'
if ! l.MatchOneOrMore(rangeDigits) {
printError(l.Column(), "Illegal number format - Missing digits after '.'")
l.IgnoreToken()
break
}
}
l.EmitTokenWithBytes(T_NUMBER)
// ID
case l.MatchOne(rangeAlpha) && l.MatchNoneOrMore(rangeAlphaNum):
l.EmitTokenWithBytes(T_ID)
// Unknown
default :
l.NextRune()
printError(l.Column(), "Unknown Character")
l.IgnoreToken()
}
// See you again soon!
return lex
}
// parse tries to execute a general expression from the lexed tokens.
// Returns nil - We only take one pass at the input string
func parse(p parser.Parser) parser.StateFn {
if p.PeekTokenType(0) != T_EOF {
// Assignment ( id = general_expression )
if p.PeekTokenType(0) == T_ID && p.PeekTokenType(1) == T_EQUALS {
tId := p.NextToken()
p.SkipToken() // skip '='
val, ok := pGeneralExpression(p)
if ok {
t := p.NextToken()
if t.Type() != T_EOF {
printError(t.Column(), "Expecting operator")
} else {
id := string(tId.Bytes())
vars[id] = val
}
}
// General expression
} else {
val, ok := pGeneralExpression(p)
if ok {
t := p.NextToken()
if t.Type() != T_EOF {
printError(t.Column(), "Expecting operator")
} else {
p.Emit(val)
}
}
}
}
p.ClearTokens()
p.Emit(nil) // We're done - One pass only
return nil
}
// pGeneralExpression is the starting point for parsing a General Expression.
// It is basically a pass-through to pAdditiveExpression, but it feels cleaner
func pGeneralExpression(p parser.Parser) (f float64, ok bool) { return pAdditiveExpression(p) }
// pAdditiveExpression parses [ expression ( ( '+' | '-' ) expression )? ]
func pAdditiveExpression(p parser.Parser) (f float64, ok bool) {
f, ok = pMultiplicitiveExpression(p)
if ok {
t := p.NextToken()
switch t.Type() {
// Add (+)
case T_PLUS :
r, ok := pAdditiveExpression(p)
if ok {
f += r
}
// Subtract (-)
case T_MINUS :
r, ok := pAdditiveExpression(p)
if ok {
f -= r
}
// Unknown - Send it back upstream
default :
p.BackupToken()
ok = true
}
}
return
}
// pMultiplicitiveExpression parses [ expression ( ( '*' | '/' ) expression )? ]
func pMultiplicitiveExpression(p parser.Parser) (f float64, ok bool) {
f, ok = pOperand(p)
if ok {
t := p.NextToken()
switch t.Type() {
// Multiply (*)
case T_MULTIPLY :
r, ok := pMultiplicitiveExpression(p)
if ok {
f *= r
}
// Divide (/)
case T_DIVIDE :
r, ok := pMultiplicitiveExpression(p)
if ok {
f /= r
}
// Unknown - Send it back upstream
default :
p.BackupToken()
ok = true
}
}
return
}
// pOperand parses [ id | number | '(' expression ')' ]
func pOperand (p parser.Parser) (f float64, ok bool) {
var err error
m := p.Marker()
t := p.NextToken()
switch t.Type() {
// ID
case T_ID :
var id = string( t.Bytes() )
f, ok = vars[ id ]
if !ok {
printError(t.Column(), fmt.Sprint("id '",id,"' not defined"))
f = 0.0
}
// Number
case T_NUMBER :
f, err = strconv.ParseFloat(string(t.Bytes()), 64)
ok = nil == err
if !ok {
printError(t.Column(), fmt.Sprint("Error reading number: ", err.Error()))
f = 0.0
}
// '(' Expresson ')'
case T_OPEN_PAREN :
f, ok = pGeneralExpression(p)
if ok {
t2 := p.NextToken()
if t2.Type() != T_CLOSE_PAREN {
printError(t.Column(), "Unbalanced Paren")
ok = false
f = 0.0
}
}
// EOF
case T_EOF:
printError(t.Column(), "Unexpected EOF - Expecting operand")
ok = false
f = 0.0
// Unknown
default:
printError(t.Column(), "Expecting operand")
ok = false
f = 0.0
}
if !ok {
p.Reset(m)
}
return
}
// printError prints an error msg pointing to the specified column of the input.
func printError(col int, msg string) {
fmt.Print(strings.Repeat(" ", col-1), "^ ", msg, "\n")
}
To install the package manually
git clone https://github.com/iNamik/go_parser
cd go_parser
gomake
gomake install
Or you can install the package via goinstall
goinstall github.com/iNamik/go_parser
go_parser depends on the iNamik go_container queue package and the iNamik go_lexer package:
- David Farrell