go_parser

Parser API in Go

ABOUT

The 'go_parser' package is an API to help you create hand-written lexers and parsers.

The package was inspired by Rob Pikes' video Lexical Scanning In Go and golang's 'template' package.

PARSER INTERFACE

Below is the interface for the main Parser type:

// Parser helps you process lexer tokens
type Parser interface {

	// PeekTokenType allows you to look ahead at tokens without consuming them
	PeekTokenType(int) lexer.TokenType

	// PeekToken allows you to look ahead at tokens without consuming them
	PeekToken(int) *lexer.Token

	// NextToken consumes and returns the next token
	NextToken()    *lexer.Token

	// SkipToken consumes the next token without returning it
	SkipToken()

	// SkipTokens consumes the next n tokens without returning them
	SkipTokens(int)

	// BackupToken un-consumes the last token
	BackupToken ()

	// BackupTokens un-consumes the last n tokens
	BackupTokens(int)

	// ClearTokens clears all consumed tokens
	ClearTokens()

	// Emit emits an object, consuming matched tokens
	Emit (interface{})

	EOF() bool

	// Next retrieves the next emitted item
	Next() interface{}

	// Marker returns a marker that you can use to reset the parser state later
	Marker() *Marker

	// Reset resets the lexer state to the specified marker
	Reset(*Marker)
}

EXAMPLE

Below is a sample calculator program that uses the parser (and lexer) API:

//
//	calc.go implements a simple calculator using the iNamik lexer and parser api.
//
//	Input is read from STDIN
//
//	The input expression is matched against the following pattern:
//
//	input_exp:
//	( id '=' )? general_exp
//	general_exp:
//		operand ( operator operand )?
//	operand:
//		number | id | '(' general_exp ')'
//	operator:
//		'+' | '-' | '*' | '/'
//	number:
//		digit+ ( '.' digit+ )?
//	digit:
//		['0'..'9']
//	id:
//		alpha ( alpa | digit )*
//	alpha:
//		['a'..'z'] | ['A'..'Z']
//
//	Precedence is as expected, with '*' and '/' have higher precedence
//	than '+' and '-', as follows:
//
//	1 + 2 * 3 - 4 / 5  ==  1 + (2 * 3) - (4 / 5)
//

package main

import (
	"bufio"
	"bytes"
	"fmt"
	"os"
	"strings"
	"strconv"
)
import (
	"github.com/iNamik/go_lexer"
	"github.com/iNamik/go_parser"
)

// We define our lexer tokens starting from the pre-defined EOF token
const (
	T_EOF   lexer.TokenType = lexer.TokenTypeEOF
	T_NIL                   = lexer.TokenTypeEOF + iota
	T_ID
	T_NUMBER
	T_PLUS
	T_MINUS
	T_MULTIPLY
	T_DIVIDE
	T_EQUALS
	T_OPEN_PAREN
	T_CLOSE_PAREN
)

// To store variables
var vars = map[string] float64 {}

// Single-character tokens
var singleChars  = []byte            { '+'   , '-'    , '*'       , '/'     , '='     , '('         , ')'           }
var singleTokens = []lexer.TokenType { T_PLUS, T_MINUS, T_MULTIPLY, T_DIVIDE, T_EQUALS, T_OPEN_PAREN, T_CLOSE_PAREN }

// Multi-character tokens
var rangeWhitespace = []byte { ' ', '\t' }
var rangeDigits     = lexer.RangeToBytes("0-9")
var rangeAlpha      = lexer.RangeToBytes("a-zA-Z")
var rangeAlphaNum   = lexer.RangeToBytes("0-9a-zA-Z")

// main
func main() {

	// Create a buffered reader from STDIN
	stdin := bufio.NewReader(os.Stdin)

	for {
		// Read a line of input
		input, _, err := stdin.ReadLine()

		// Error? we're done
		if nil != err {
			break
		}

		// Anything to process?
		if len(input) > 0 {
			// Create a new lexer to turn the input text into tokens
			l := lexer.NewLexer(lex, strings.NewReader(string(input)), len(input), 2)

			// Create a new parser that feeds off the lexer and generates expression values
			p := parser.NewParser(parse, l, 2)

			// Loop over parser emits
			for i := p.Next() ; nil != i ; i = p.Next() {
				fmt.Printf("%v\n", i)
			}
		}
	}
}

// lex is the starting (and only) StateFn for lexing the input into tokens
func lex(l lexer.Lexer) lexer.StateFn {

	// EOF
	if l.MatchEOF() {
		l.EmitEOF()
		return nil // We're done here
	}

	// Single-char token?
	if i := bytes.IndexRune(singleChars, l.PeekRune(0)) ; i >= 0 {
		l.NextRune()
		l.EmitToken(singleTokens[i])
		return lex
	}

	switch {

		// Skip whitespace
		case l.MatchOneOrMore(rangeWhitespace) :
			l.IgnoreToken()

		// Number
		case l.MatchOneOrMore(rangeDigits) :
			if l.PeekRune(0) == '.' {
				l.NextRune() // skip '.'
				if ! l.MatchOneOrMore(rangeDigits) {
					printError(l.Column(), "Illegal number format - Missing digits after '.'")
					l.IgnoreToken()
					break
				}
			}
			l.EmitTokenWithBytes(T_NUMBER)

		// ID
		case l.MatchOne(rangeAlpha) && l.MatchNoneOrMore(rangeAlphaNum):
			l.EmitTokenWithBytes(T_ID)

		// Unknown
		default :
			l.NextRune()
			printError(l.Column(), "Unknown Character")
			l.IgnoreToken()
	}

	// See you again soon!
	return lex
}

// parse tries to execute a general expression from the lexed tokens.
// Returns nil - We only take one pass at the input string
func parse(p parser.Parser) parser.StateFn {

	if p.PeekTokenType(0) != T_EOF {
		// Assignment ( id = general_expression )
		if p.PeekTokenType(0) == T_ID && p.PeekTokenType(1) == T_EQUALS {
			tId := p.NextToken()

			p.SkipToken() // skip '='

			val, ok := pGeneralExpression(p)

			if ok {
				t := p.NextToken()
				if t.Type() != T_EOF {
						printError(t.Column(), "Expecting operator")
				} else {
					id := string(tId.Bytes())
					vars[id] = val
				}
			}
		// General expression
		} else {
			val, ok := pGeneralExpression(p)

			if ok {
				t := p.NextToken()
				if t.Type() != T_EOF {
						printError(t.Column(), "Expecting operator")
				} else {
					p.Emit(val)
				}
			}
		}
	}

	p.ClearTokens()
	p.Emit(nil) // We're done - One pass only

	return nil
}

// pGeneralExpression is the starting point for parsing a General Expression.
// It is basically a pass-through to pAdditiveExpression, but it feels cleaner
func pGeneralExpression(p parser.Parser) (f float64, ok bool) { return pAdditiveExpression(p) }

// pAdditiveExpression parses [ expression ( ( '+' | '-' ) expression )? ]
func pAdditiveExpression(p parser.Parser) (f float64, ok bool) {

	f, ok = pMultiplicitiveExpression(p)

	if ok {
		t := p.NextToken()
		switch t.Type() {

			// Add (+)
			case T_PLUS :
				r, ok := pAdditiveExpression(p)
				if ok {
					f += r
				}

			// Subtract (-)
			case T_MINUS :
				r, ok := pAdditiveExpression(p)
				if ok {
					f -= r
				}

			// Unknown - Send it back upstream
			default :
				p.BackupToken()
				ok = true
		}
	}

	return
}

// pMultiplicitiveExpression parses [ expression ( ( '*' | '/' ) expression )? ]
func pMultiplicitiveExpression(p parser.Parser) (f float64, ok bool) {

	f, ok = pOperand(p)

	if ok {
		t := p.NextToken()
		switch t.Type() {

			// Multiply (*)
			case T_MULTIPLY :
				r, ok := pMultiplicitiveExpression(p)
				if ok {
					f *= r
				}

			// Divide (/)
			case T_DIVIDE :
				r, ok := pMultiplicitiveExpression(p)
				if ok {
					f /= r
				}

			// Unknown - Send it back upstream
			default :
				p.BackupToken()
				ok = true
		}
	}

	return
}

// pOperand parses [ id | number | '(' expression ')' ]
func pOperand (p parser.Parser) (f float64, ok bool) {

	var err error

	m := p.Marker()
	t := p.NextToken()

	switch t.Type() {

		// ID
		case T_ID :
			var id = string( t.Bytes() )
			f, ok = vars[ id ]
			if !ok {
				printError(t.Column(), fmt.Sprint("id '",id,"' not defined"))
				f = 0.0
			}

		// Number
		case T_NUMBER :
			f, err = strconv.ParseFloat(string(t.Bytes()), 64)
			ok = nil == err
			if !ok {
				printError(t.Column(), fmt.Sprint("Error reading number: ", err.Error()))
				f = 0.0
			}

		// '(' Expresson ')'
		case T_OPEN_PAREN :
			f, ok = pGeneralExpression(p)
			if ok {
				t2 := p.NextToken()
				if t2.Type() != T_CLOSE_PAREN {
					printError(t.Column(), "Unbalanced Paren")
					ok = false
					f = 0.0
				}
			}

		// EOF
		case T_EOF:
			printError(t.Column(), "Unexpected EOF - Expecting operand")
			ok = false
			f = 0.0

		// Unknown
		default:
			printError(t.Column(), "Expecting operand")
			ok = false
			f = 0.0
	}

	if !ok {
		p.Reset(m)
	}

	return
}

// printError prints an error msg pointing to the specified column of the input.
func printError(col int, msg string) {
	fmt.Print(strings.Repeat(" ", col-1), "^ ", msg, "\n")
}

INSTALL

To install the package manually

git clone https://github.com/iNamik/go_parser
cd go_parser
gomake
gomake install

Or you can install the package via goinstall

goinstall github.com/iNamik/go_parser

DEPENDENCIES

go_parser depends on the iNamik go_container queue package and the iNamik go_lexer package:

AUTHORS

David Farrell