Engineering a Compiler, Second Edition
实现EaC2中的编译器算法,感谢PLCT的方舟·编译技术入门与实战视频课程提供的讲解。
# 构建
mkdir build && cd build
cmake ../ && cmake --build .
# test
./test ../test/
# main
cd ../ && ./main
- lex // 生成最小DFA,从RE -> NFA -> DFA -> minDFA
- spec // 定义所解析语言的词法规则和语法规则
- spec.hpp // 定义词法规则和语法规则的模版
- pl0 // PL/0语言
- pl0.hpp // 定义好的PL/0词法规则和语法规则
- test.cpp // 示例展示解析源码的程序,输出token流和ast
- () // EaC2中的括号语言
- 01 // 随意定义的语言
- src
- Lex.hpp // 词法生成器模版类
- LR1.hpp // LR(1)语法生成器模版类
- parser.hpp // 整合起来的parser模版类
- utils // 帮助函数
下面示例展示如何定义PL/0语言的词法和语法规则,终结符和非终结符的定义请看test/pl0-spec.hpp
文件
词法规则定义:
const Lex PL0_LEX{
{
R"(const|var|procedure|call|begin|end|while|do|odd|if|=|,|;|:=|?|!|#|<|<=|>|>=|\+|-|\*|/|\(|\)|.)",
TerminalSymbolType::keyword
},
{
"[a-zA-Z_][a-zA-Z_0-9]*",
TerminalSymbolType::ident
},
{
"[0-9]+",
TerminalSymbolType::number
},
{
"[ \n\t]+",
TerminalSymbolType::ignore
}
};
语法规则定义:
const Spec::CFG<Symbol> PL0_CFG{
{
Symbol(NonterminalSymbolType::Program),
{
{
Symbol(NonterminalSymbolType::Block),
Symbol(TerminalSymbolType::keyword, ".")
}
}
},
{
Symbol(NonterminalSymbolType::Block),
{
{
Symbol(NonterminalSymbolType::Assign),
Symbol(NonterminalSymbolType::Declare),
Symbol(NonterminalSymbolType::Procedure),
Symbol(NonterminalSymbolType::Statement)
}
}
},
{
Symbol(NonterminalSymbolType::Assign),
{
{
Symbol(TerminalSymbolType::keyword, "const"),
Symbol(TerminalSymbolType::ident),
Symbol(TerminalSymbolType::keyword, "="),
Symbol(TerminalSymbolType::number),
Symbol(NonterminalSymbolType::AssignRest),
Symbol(TerminalSymbolType::keyword, ";")
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::AssignRest),
{
{
Symbol(TerminalSymbolType::keyword, ","),
Symbol(TerminalSymbolType::ident),
Symbol(TerminalSymbolType::keyword, "="),
Symbol(TerminalSymbolType::number),
Symbol(NonterminalSymbolType::AssignRest)
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::Declare),
{
{
Symbol(TerminalSymbolType::keyword, "var"),
Symbol(TerminalSymbolType::ident),
Symbol(NonterminalSymbolType::DeclareRest),
Symbol(TerminalSymbolType::keyword, ";")
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::DeclareRest),
{
{
Symbol(TerminalSymbolType::keyword, ","),
Symbol(TerminalSymbolType::ident),
Symbol(NonterminalSymbolType::DeclareRest)
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::Procedure),
{
{
Symbol(TerminalSymbolType::keyword, "procedure"),
Symbol(TerminalSymbolType::ident),
Symbol(TerminalSymbolType::keyword, ";"),
Symbol(NonterminalSymbolType::Block),
Symbol(TerminalSymbolType::keyword, ";"),
Symbol(NonterminalSymbolType::Procedure)
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::Statement),
{
{
Symbol(TerminalSymbolType::ident),
Symbol(TerminalSymbolType::keyword, ":="),
Symbol(NonterminalSymbolType::Expression)
},
{
Symbol(TerminalSymbolType::keyword, "call"),
Symbol(TerminalSymbolType::ident)
},
{
Symbol(TerminalSymbolType::keyword, "?"),
Symbol(TerminalSymbolType::ident)
},
{
Symbol(TerminalSymbolType::keyword, "!"),
Symbol(NonterminalSymbolType::Expression)
},
{
Symbol(TerminalSymbolType::keyword, "begin"),
Symbol(NonterminalSymbolType::Statement),
Symbol(NonterminalSymbolType::StatementRest),
Symbol(TerminalSymbolType::keyword, "end")
},
{
Symbol(TerminalSymbolType::keyword, "if"),
Symbol(NonterminalSymbolType::Condition),
Symbol(TerminalSymbolType::keyword, "then"),
Symbol(NonterminalSymbolType::Statement)
},
{
Symbol(TerminalSymbolType::keyword, "while"),
Symbol(NonterminalSymbolType::Condition),
Symbol(TerminalSymbolType::keyword, "do"),
Symbol(NonterminalSymbolType::Statement)
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::StatementRest),
{
{
Symbol(TerminalSymbolType::keyword, ";"),
Symbol(NonterminalSymbolType::Statement),
Symbol(NonterminalSymbolType::StatementRest)
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::Condition),
{
{
Symbol(TerminalSymbolType::keyword, "odd"),
Symbol(NonterminalSymbolType::Expression)
},
{
Symbol(NonterminalSymbolType::Expression),
Symbol(NonterminalSymbolType::RelationOp),
Symbol(NonterminalSymbolType::Expression),
}
}
},
{
Symbol(NonterminalSymbolType::RelationOp),
{
{
Symbol(TerminalSymbolType::keyword, "=")
},
{
Symbol(TerminalSymbolType::keyword, "#")
},
{
Symbol(TerminalSymbolType::keyword, "<")
},
{
Symbol(TerminalSymbolType::keyword, "<=")
},
{
Symbol(TerminalSymbolType::keyword, ">")
},
{
Symbol(TerminalSymbolType::keyword, ">=")
}
}
},
{
Symbol(NonterminalSymbolType::Expression),
{
{
Symbol(NonterminalSymbolType::Term),
Symbol(NonterminalSymbolType::ExpressionRest)
},
{
Symbol(NonterminalSymbolType::Op1),
Symbol(NonterminalSymbolType::Term),
Symbol(NonterminalSymbolType::ExpressionRest)
}
}
},
{
Symbol(NonterminalSymbolType::ExpressionRest),
{
{
Symbol(NonterminalSymbolType::Op1),
Symbol(NonterminalSymbolType::Term),
Symbol(NonterminalSymbolType::ExpressionRest)
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::Op1),
{
{
Symbol(TerminalSymbolType::keyword, "+")
},
{
Symbol(TerminalSymbolType::keyword, "-")
}
}
},
{
Symbol(NonterminalSymbolType::Term),
{
{
Symbol(NonterminalSymbolType::Factor),
Symbol(NonterminalSymbolType::TermRest)
}
}
},
{
Symbol(NonterminalSymbolType::TermRest),
{
{
Symbol(NonterminalSymbolType::Op2),
Symbol(NonterminalSymbolType::Factor),
Symbol(NonterminalSymbolType::TermRest)
},
{
Symbol(SymbolType::EPSILON)
}
}
},
{
Symbol(NonterminalSymbolType::Op2),
{
{
Symbol(TerminalSymbolType::keyword, "*")
},
{
Symbol(TerminalSymbolType::keyword, "/")
}
}
},
{
Symbol(NonterminalSymbolType::Factor),
{
{
Symbol(TerminalSymbolType::ident)
},
{
Symbol(TerminalSymbolType::number)
},
{
Symbol(TerminalSymbolType::keyword, "("),
Symbol(NonterminalSymbolType::Expression),
Symbol(TerminalSymbolType::keyword, ")")
}
}
}
};
定义好了之后,就可以调用写好的词法和语法分析生成器模版类、及parser解析源代码了(代码文件为test/pl0/test.cpp
)。
例如解析test/pl0/code1.pl0
源码的输出可以查看test/pl0/ast1.txt
文件。
- 将分开判断token的minDFA合成一个,方法是在生成每个NFA后合并成一个,这时就需要在状态上携带所识别的Token类型,以便在后面识别后能知道识别的是什么类型的Token
- 实现词法作用域等语义的检查