cznic/goyacc

yacc

Closed this issue · 8 comments

Hi,

parser.y:36:41: $$ of declarations has no declared type (and 30 more errors)
what does it complain?

program: declarations 
{
}

declarations: declaration
{
  declarationNode := newDeclarationNode($$.funcDecl)
  $$.node = declarationNode

  cast(yylex).SetAstRoot($$.node)
}

Is it possible for me to see the full parser.y file?

But even before that eventuality here are some notes:

  • Look for example here for a discussion of the error message. In brief, when there's a
%union {
        f1 type1
        f2 type2
        ...
}

The $$, $n, ... values refer to a specific field of the parse stack value of type defined by the %union. The selection of the field can be made in two ways. Either use $<type>$, $<type>n, ... to explicitly select the type (and thus the field), or use the %token <type> tok1, tok2, ... and %type <type> symbol1, symbol2 to assign types to tokens and/or non terminal symbols. When yacc knows the proper type for $$, $n, ... it will use the type information to select the proper field. If the type is not defined, yacc cannot do that and that's why it complains.

  • In the example above, the $$.funcdecl refers in the semantic action to the $$ variable which has not yet any defined value. $$ is the "output" value and should be initialized by some assignment before ever referring to it on the RHS of the action. It may work, because for compatibility with existing go tool yacc the $$ is not explicitly cleared to a zero value (which breaks this), but relying on that is not a good idea.

Note to self: The errorlist should be expanded when output to stderr.

Please update your installed version to get the full error list printed.

is goyacc also compatible with https://github.com/blynn/nex ?

here the current .y file (not yet fully finished):

%{
package dsl
%}

%union {
  s string
  node Node
  funcName string
  funcDecl []Node
  params []string
}

%token WHILE
%token PRINT
%token END_LINE
%token ADD_OP
%token MUL_OP
%token ASSIGN
%token BEGIN_EXPRESSION
%token END_EXPRESSION
%token BEGIN_BLOCK
%token END_BLOCK
%token NUMBER
%token IDENTIFIER
%token FUNC
%token STRING
%token COMMA
%%

program: declarations 
{
}

declarations: declaration
{
  declarationNode := newDeclarationNode($$.funcDecl)
  $$.node = declarationNode

  cast(yylex).SetAstRoot($$.node)
}

declaration: fun_declaration
{  
  functionDeclNode := newFunctionDeclNode($1.funcName, $1.params, $1.node)
  $$.funcDecl = append($$.funcDecl, functionDeclNode)

  cast(yylex).InsertFuncDecl(functionDeclNode)
}
| fun_declaration declaration
{  
  functionDeclNode := newFunctionDeclNode($1.funcName, $1.params, $1.node)
  $$.funcDecl = append($2.funcDecl, functionDeclNode)

  cast(yylex).InsertFuncDecl(functionDeclNode)
}

fun_declaration: FUNC IDENTIFIER BEGIN_EXPRESSION params END_EXPRESSION block
{ 
  $$.funcName = $2.s
  $$.params = $4.params
  $$.node = $6.node
}

params: 
{
}
| param_list
{
  $$ = $1
}

param_list: param
{
  $$.params = append($$.params, $1.s)
}
| param_list COMMA param
{ 
  $$.params = append($1.params, $3.s)
}

param: IDENTIFIER
{
  $$ = $1
}

block: BEGIN_BLOCK statement_list END_BLOCK
{
  $$.node = $2.node
}

statement_list: statement
{
  statementNode := newStatementNode($1.node, nil)
  $$.node = statementNode
}
| statement statement_list
{   
  statementNode := newStatementNode($1.node, $2.node)
  $$.node = statementNode
}
| statement END_LINE
{   
  statementNode := newStatementNode($1.node, nil)
  $$.node = statementNode
}
| statement END_LINE statement_list 
{
  statementNode := newStatementNode($1.node, $3.node)
  $$.node = statementNode
}

statement: assignation 
{
  $$.node = $1.node
}
| structure
{
  $$.node = $1.node
}
| PRINT expression
{
  printNode := newPrintNode($2.node)
  $$.node = printNode
}
| func_call
{
  $$.node = $1.node
}

assignation: IDENTIFIER ASSIGN expression 
{   
  tokenNode := newTokenNode($1.s)
  assignNode := newAssignNode(tokenNode, $3.node) // assign.Right = $3.node ... the expression is already a node, so we just assign it directly
  $$.node = assignNode
}

structure: WHILE expression BEGIN_BLOCK statement_list END_BLOCK
{
   whileNode := newWhileNode($2.node, $4.node)
   $$.node = whileNode
}

expression: NUMBER 
{ 
  $$.node = newTokenNode($1.s)
}
| BEGIN_EXPRESSION expression END_EXPRESSION
{
  statementNode := newStatementNode($2.node, nil)
  $$.node = statementNode
}
| expression ADD_OP expression
{
  opNode := newOpNode($2.s, $1.node, $3.node)
  $$.node = opNode
}
| expression MUL_OP expression
{
  opNode := newOpNode($2.s, $1.node, $3.node)
  $$.node = opNode
}
| IDENTIFIER
{
  $$.node = newTokenNode($1.s)
}

func_call: IDENTIFIER BEGIN_EXPRESSION END_EXPRESSION
{
  funcCallNode := newFuncCallNode($1.s)
  $$.node = funcCallNode
}

%%
func cast(y yyLexer) *Compiler { return y.(*Lexer).p }

I don't know nex, I use golex. Wrt "compatibility", goyacc does know nothing about nex - or about golex either. The yyParse function just expects to get a value which implements this interface

type yyLexer interface {
    Lex(lval *yySymType) int
    Error(e string)
}

Some notes on your .y file:


declarations: declaration
{
  declarationNode := newDeclarationNode($$.funcDecl)
  $$.node = declarationNode

  cast(yylex).SetAstRoot($$.node)
}

Maybe what's intended is:

%type <node> declarations
%type <funcdecl> declaration

%%

declarations:
        declaration
        {
                $$ = newDeclarationNode($1)
                cast(yylex).SetAstRoot($$.node)
        }

statement: assignation 
{
  $$.node = $1.node
}
| structure
{
  $$.node = $1.node
}
| PRINT expression
{
  printNode := newPrintNode($2.node)
  $$.node = printNode
}
| func_call
{
  $$.node = $1.node
}

Perhaps:

%type <node> assignation structure expression func_call

%%
statement:
        assignation 
|       structure
|       PRINT expression
        {
                $$ = newPrintNode($2)
        }
|       func_call

Note: the "dropped" semantic actions default to { $$ = $1 }, so the code works the same as before.


This cannot work:

fun_declaration: FUNC IDENTIFIER BEGIN_EXPRESSION params END_EXPRESSION block
{ 
  $$.funcName = $2.s
  $$.params = $4.params
  $$.node = $6.node
}

Perhaps

%token <s> IDENTIFIER
%type <params> params
%type <node> block

%%

fun_declaration:
        FUNC IDENTIFIER BEGIN_EXPRESSION params END_EXPRESSION block
        { 
                $<funcName>$ = $2
                $<params>$ = $4
                $<node>$ = $6
        }

It's not common for $$ to carry more than one type. It's legal, though.


Etc.

can't i keep the following types in the union?

%type <node> declarations
%type <funcdecl> declaration
%type <params> params
%type <node> block
%type <node> assignation structure expression func_call

Those types are in the union. The name inside <name> refers to a field name if the %union. For example:

%union {
        s string
        i int
}

%token <s> STR_LIT
%token <i> INT_LIT

%type <i> expr

%%

expr:
        INT_LIT '+' INT_LIT
        {
                $$ = $1 + $3
        }

why should defining those types be better...maybe less typing?

should have resolved most of the issues:

%{
package dsl
%}

%union {
  s string
  node Node
  funcName string
  funcDecl []Node
  funcParams []string
}

%token WHILE
%token PRINT
%token END_LINE
%token ADD_OP
%token MUL_OP
%token ASSIGN
%token BEGIN_EXPRESSION
%token END_EXPRESSION
%token BEGIN_BLOCK
%token END_BLOCK
%token NUMBER
%token IDENTIFIER
%token FUNC
%token STRING
%token COMMA

%type <node> declarations
%type <funcDecl> declaration
%type <node> fun_declaration
%token <s> IDENTIFIER
%type <funcParams> params
%type <node> block
%type <funcParams> param_list
%type <s> param
%type <node> statement_list
%type <node> statement
%type <node> assignation structure expression func_call
%token <s> NUMBER
%token <s> ADD_OP
%token <s> MUL_OP
%%

program: declarations 
{
}

declarations: declaration
{ 
  $$ = newDeclarationNode($1)
  cast(yylex).SetAstRoot($$)
}

declaration: fun_declaration
{  
  $$ = append($$, $1)  
}
| fun_declaration declaration
{
  $$ = append($2, $1)
}

fun_declaration: FUNC IDENTIFIER BEGIN_EXPRESSION params END_EXPRESSION block
{  
  functionDeclNode := newFunctionDeclNode($2, $4, $6)
  $$ = functionDeclNode

  cast(yylex).InsertFuncDecl(functionDeclNode)
}

params: 
{
}
| param_list
{
  $$ = $1
}

param_list: param
{
  $$ = append($$, $1)
}
| param_list COMMA param
{ 
  $$ = append($1, $3)
}

param: IDENTIFIER
{
  $$ = $1
}

block: BEGIN_BLOCK statement_list END_BLOCK
{
  $$ = $2
}

statement_list: statement
{
  statementNode := newStatementNode($1, nil)
  $$ = statementNode
}
| statement statement_list
{   
  statementNode := newStatementNode($1, $2)
  $$ = statementNode
}
| statement END_LINE
{   
  statementNode := newStatementNode($1, nil)
  $$ = statementNode
}
| statement END_LINE statement_list 
{
  statementNode := newStatementNode($1, $3)
  $$ = statementNode
}

statement: assignation 
{
  $$ = $1
}
| structure
{
  $$ = $1
}
| PRINT expression
{
  printNode := newPrintNode($2)
  $$ = printNode
}
| func_call
{
  $$ = $1
}

assignation: IDENTIFIER ASSIGN expression 
{   
  tokenNode := newTokenNode($1)
  assignNode := newAssignNode(tokenNode, $3) // assign.Right = $3.node ... the expression is already a node, so we just assign it directly
  $$ = assignNode
}

structure: WHILE expression BEGIN_BLOCK statement_list END_BLOCK
{
   whileNode := newWhileNode($2, $4)
   $$ = whileNode
}

expression: NUMBER 
{ 
  $$ = newTokenNode($1)
}
| BEGIN_EXPRESSION expression END_EXPRESSION
{
  statementNode := newStatementNode($2, nil)
  $$ = statementNode
}
| expression ADD_OP expression
{
  opNode := newOpNode($2, $1, $3)
  $$ = opNode
}
| expression MUL_OP expression
{
  opNode := newOpNode($2, $1, $3)
  $$ = opNode
}
| IDENTIFIER
{
  $$ = newTokenNode($1)
}

func_call: IDENTIFIER BEGIN_EXPRESSION END_EXPRESSION
{
  funcCallNode := newFuncCallNode($1)
  $$ = funcCallNode
}

%%
func cast(y yyLexer) *Compiler { return y.(*Lexer).p }
  • Manual <type> is a no-questions-asked type cast and as such it's inherently error prone.
  • Easier to change the type name. Less places to handle that.
  • Enables goyacc to do type checking and catch more static errors.
  • Less typing. It enables goyacc to effectively (and safely) insert the <type> tag in $n or $$ where appropriate and/or necessary.