Header only template-based LL parser generator.
fully supported with clang / c++14
Add include
directory to INCLUDE_PATH
and
#include <parser.hpp>
will include every features to your application
Note that in all examples,
#include <parser.hpp>
namespace ep = eh::parser;
// @FileName : examples/calculator.cpp
int main()
{
// REGEX : [0-9]
// Pattern Match and return its digit as integer type
auto digit_parser =
ep::action(
ep::range( '0', '9' ),
// captured Attribute of range() is decltype(*iterator) = char
// this functor will be called every range() pattern matched
[]( char captured_character )
{
// return char->digit
// this will be new Attribute of this parser
return (int)(captured_character - '0');
}
);
// make action() using [] operator
auto digit_parser2 =
ep::range('0', '9')[ digit_parser.functor ];
// REGEX : [0-9]+
auto integer_parser =
ep::action(
ep::repeat( digit_parser, 1, 12 ),
// captured Attribute of repeat() is vector< child Attribute > = vector<int>
[]( std::vector<int> &captured_digits )
{
int result = 0;
for( int d : captured_digits )
{
result = result * 10 + d;
}
// return accumulated integer;
// this will be new Attribute of this parser
return result;
}
);
// REGEX : integer_parser ( ('+'|'-') integer_parser )*
auto plus_minus_expression_noaction =
ep::seq( // Attribute = tuple< Attr of child > = tuple< int, vector< tuple<char,int> > >
integer_parser, // Attribute = int
ep::repeat( // Attribute = vector< Attr of child > = vector< tuple<char,int> >
ep::seq( // Attr = tuple< Attrs of child > = tuple< char, int >
ep::or_( // char ( only if all child's Attr are same )
ep::one('-'), // char
ep::one('+') // char
),
integer_parser // int
),
0, 10000000
)
);
// or using literals and operator
using namespace ep::literals;
auto plus_minus_expression_noaction2 =
integer_parser
>>
*(
('-'_p | '+'_p) >> integer_parser
);
auto plus_minus_expression =
ep::action(
plus_minus_expression_noaction,
// captured tuple will be automatically unpacked
[]( int first_captured_integer, std::vector< std::tuple<char,int> > const& rhs_captured )
{
int result = first_captured_integer;
// process right-hand-side operations
for( auto tup : rhs_captured )
{
char operation = std::get<0>( tup );
int rhs_integer = std::get<1>( tup );
if( operation == '+' ){ result += rhs_integer; }
else if( operation == '-' ){ result -= rhs_integer; }
}
// return calculated integer;
// this will be new Attribute of this parser
return result;
}
);
std::string input_string = "1+22-333+4444-55555+666666 unknown string";
auto begin = input_string.begin();
auto parse_result = plus_minus_expression.parse( begin, input_string.end() );
std::cout << "Input String : " << input_string << "\n";
std::cout << "Parse Result : " << std::boolalpha << parse_result.is_valid() << "\n";
std::cout << "Parsed Data : " << parse_result.get() << " = " << 1+22-333+4444-55555+666666 << "\n";
std::cout << "Iterator 'begin' point at : " << std::string( begin, input_string.end() ) << "\n";
}
see 'examples/basic.cpp' for basic tutorial
Every Parser Object have parse( begin&:iterator, end:iterator )
function
that performs pattern-mathing scheme
for example, range( min_, max_ )
returns a Parser Object.
It's parse() function will consume one iterator
and returns whether a character is in range [min_, max_]
if the pattern doesn't match, iterator will not move
auto one_small_alphabet = ep::range( 'a', 'z' );
auto one_big_alphabet = ep::range( 'A', 'Z' );
auto one_digit = ep::range( '0', '9' );
std::string my_string = "abcdefg 123456";
auto begin = my_string.begin();
/*
pattern matched, 'begin' will now point my_string[1]
*/
one_small_alphabet.parse( begin, my_string.end() );
/*
pattern doesn't match, 'begin' will not move
*/
one_big_alphabet.parse( begin, my_string.end() );
parse()
function returns an parse_result_t<Attr>
value
which contains the result of pattern matching
and the parsed data from input stream.
for example, range() Parser's ( and any other single-iterator-consuming Parser Objects ) Attr is char
or decltype(*iterator)
that returns the character it consumed directly.
auto parse_result = one_small_alphabet.parse( begin, my_string.end() );
std::cout << "Parse Result : " << std::boolalpha << parse_result.is_valid() << "\n";
std::cout << "Parsed Character : " << parse_result.get() << "\n";
// at this point, 'begin' points my_string[2]
eh::parser::unused_t
is special class for Parser Object's Attribute. Any parser that does not extract data should use unusesd_t
as its Attribute.
There are several Parser Object Wrapper that takes other Parser Object and performs modified action on it.
or_( parser1, parser2, ... , parserN )
will test every N parsers until one of them is successfully matched.
Attribute
of or( p1, p2, ..., pn )
is Attribute
of p1
if every parsers have same Attribute
, else unused_t
.
Parser1 | Parser2 | Merged |
---|---|---|
unused_t |
unused_t |
unused_t |
unused_t |
T |
T |
T |
unused_t |
T |
T1 |
T2 |
unused_t |
Table: Merged Attribute of or_
Parser
seq( parser1, parser2, ..., parserN )
will test every N parsers sequentially.
Attribute
of seq( p1, p2, ..., pn ) is tuple< Attribute of p1, Attribute of p2, ..., Attribute of pn >
, unused_t
will not be captured into tuple.
Parser1 | Parser2 | Merged |
---|---|---|
unused_t |
unused_t |
unused_t |
unused_t |
T |
T |
T |
unused_t |
T |
T1 |
T2 |
tuple<T1,T2> |
T1 |
tuple<Ts...> |
tuple<T1,Ts...> |
tuple<Ts...> |
T2 |
tuple<Ts...,T2> |
Table: Merged Attribute of seq
Parser
repeat( parser, min_, max_ )
will test 'parser' X times where X is in range [min_,max_].
Attribute
of repeat( p ) is vector< Attribute of p >
if Attribute of p
is not unused_t
, else unused_t
.
Child Parser | Attribute of repeat |
---|---|
unused_t |
unused_t |
T |
vector<T> |
Table: New Attribute of repeat
Parser
auto one_alphabet_parser = ep::or_( one_small_alphabet, one_big_alphabet );
// this is same as one_alphabet_parser
auto one_alphabet_parser2 = one_small_alphabet | one_big_alphabet;
auto one_alphabet_and_digit = ep::seq( one_alphabet_parser, one_digit );
auto one_alphabet_and_digit2 = one_alphabet_parser >> one_digit;
// [a-zA-Z]*
auto alphabet_star = ep::repeat( one_alphabet_parser, 0, 9999999 );
// [a-zA-Z]+
auto alphabet_plus = ep::repeat( one_alphabet_parser, 1, 9999999 );
action( Parser, Functor )
is special parser wrapper.
It performs same pattern-matching as its child parser,
but call a functor() every pattern matching is successfully done.
functor() could take child parser's Attribute
as its argument,
and the returned value will be its new Attribute
. If the child's Attribute
is unused_t
, the functor would not take any arguments.
If the functor does not return any value( a void function ), the Attribute
of Action Wrapper will be unused_t
.
auto action_parser = ep::action(
one_small_alphabet,
[]( int ch )
{
std::cout << "Small Alphabet Parsing Successfully Done!\n";
// now 'ch*2' is its new Attr
return ch*2;
} );
// pattern matched, functor() will be called
// and its Attr would be 'c'*2
auto action_result = action_parser.parse( begin, my_string.end() );
std::cout << "New Attr : " << action_result.get() << "\n";
Child Parser | Argument must be captured as |
---|---|
unused_t |
functor() |
T attr |
functor(attr) |
tuple<Ts...> attrs |
functor( attrs... ) |
Table: Functor and its argument
Returned Type | Attribute of action |
---|---|
void |
unused_t |
T |
T |
Table: functor's returned value and its Attribute
There are more special Parser Wrapper for advaced Parser creation.
Since every Parser Object's implementation is based on template idoms, eg. CRTP or SFINAE, it can get benefit from compiler's smart optimization. But, because Parser Objects are being deep-copied and must be defined prior to its actual invoking, we can't make cyclic, or recursive patterns.
ep::rule<Attribute,Iterator>
is virtual class based Parser Object that can be assigned as any parser objects.
auto compile_time_pattern1 = ep::range('a', 'z');
auto compile_time_pattern2 = ep::range('0', '9');
ep::rule<int,std::string::iterator> virtual_pattern, virtual_reference_pattern;
std::string str = "123123 abcabc";
auto begin = str.begin();
// this takes the reference of 'virtual_pattern'
virtual_reference_pattern = ep::ref( virtual_pattern );
// assign as small-alphabet parser
virtual_pattern = compile_time_pattern1;
// 'virtual_reference_pattern' will be assigned too
// match fail
virtual_pattern.parse( begin, str.end() );
// assign as digit parser
virtual_pattern = compile_time_pattern2;
// match success
virtual_pattern.parse( begin, str.end() );
virtual_reference_pattern.parse( begin, str.end() );
examples/compiler/
> cat examples/compiler/testsource.txt
func1()
{
var1 = 20;
// this is comment
print var1 * 10 + 10;
return;
print 20;
}
main()
{
func1();
/*
this
is also
a comment
*/
func2();
}
func2()
{
print 30;
return;
print 40;
}
> ./compiler ../examples/compiler/testsource.txt
Start Tokenizing...
CPP Comment: // this is comment
C Comment: /*
this
is also
a comment
*/
Tokenizing Result: false
func1: 1002
(: 40
): 41
{: 123
var1: 1002
=: 61
20: 1001
;: 59
print: 1004
var1: 1002
*: 42
10: 1001
+: 43
10: 1001
;: 59
return: 1013
;: 59
print: 1004
20: 1001
;: 59
}: 125
main: 1002
(: 40
): 41
{: 123
func1: 1002
(: 40
): 41
;: 59
func2: 1002
(: 40
): 41
;: 59
}: 125
func2: 1002
(: 40
): 41
{: 123
print: 1004
30: 1001
;: 59
return: 1013
;: 59
print: 1004
40: 1001
;: 59
}: 125
Tokenizing End...
Compiling Start...
Compiling End... : true
~~~~~~~~~~~~~~~~~~~~~ Program Result ~~~~~~~~~~~~~~~~~~~~~
210
30