This library allows easy parsing of ABNF written grammar, by providing a "compilable metalanguage" as a set of C macros. The library automatically build a C binary tree of the grammar in case of syntax matching.
The use of the library consists in writing the grammar ABNF rules as C functions (one function per rule). Rule functions must have the following C prototype (literally the same, except for the function name - i.e. the State *
parameter must be named st
- it's important for the macros to work properly) :
void rulename(State *st);
The function body shouldn't contains anything other than blocks (see Blocks section), that describe the rule content. Blocks must be used as it inside of rule functions (i.e. no need of init process or local variables declaration).
The use of the parser goes through a State
struct which must be defined and initialized by the calling function (e.g. main()
). This struct makes it possible to transmit the text to be parsed, as well as the subsequent retrieval of the syntax tree.
NB: The text must be syntactically correct (i.e. st.status == MATCH
) to return a valid syntax tree. If the text is syntactically incorrect st.status == FAIL
, and the syntaxe tree is irrelevant.
The definition of State is as follows:
typedef struct {
Node *node; /* syntaxe tree */
Node *left;
char *current; /* next character to be parsed */
int len; /* len of text to be parsed */
int status; /* syntaxe matching status (FAIL or MATCH )*/
} State;
A typical State
initialization would be:
int
main(int argc, char *argv[])
{
char *s = "foo";
State st = { NULL, NULL, s, strlen(s), MATCH };
return 0;
}
The next step is to pass the State
struct to the "start rule" (i.e. the one which isn't in any ABNF rule right part) :
static void start(State *);
int
main(int argc, char *argv[])
{
char *s = "foo";
State st = { NULL, NULL, s, strlen(s), MATCH };
start(&st);
return 0;
}
static void
start(State *st)
{
STR(foo)
}
The library provide the printtree(Node *tree, int indent)
function to print the returned syntax tree (where tree
is the syntax tree, and indent
the base indent in number of htab) :
static void start(State *);
int
main(int argc, char *argv[])
{
char *s = "foo";
State st = { NULL, NULL, s, strlen(s), MATCH };
start(&st);
if (st.status == MATCH)
printtree(st.node, 0);
return 0;
}
static void
start(State *st)
{
STR(foo)
}
% ./main
string: "foo"
The following ABNF basic rules are provided by the library as rule function (see RFC 5234 - Appendix B.1.):
void alpha(State *);
void bit(State *);
void crlf(State *);
void digit(State *);
void dquote(State *);
void hexdig(State *);
void htab(State *);
void lpar(State *);
void lwsp(State *);
void octet(State *);
void rpar(State *);
void sp(State *);
void vchar(State *);
void wsp(State *);
rulename = %xn-m
rulename = %xn
void
rulename(State *st)
{
/* rulename = %xn-m */
ASCIIRANGE(n, m)
/* rulename = %xn */
ACIIRANGE(n, n)
}
STR()
is case insensitive, use ASCIIRANGE()
for case sensitive.
rulename = "s"
void
rulename(State *st)
{
STR(s)
}
rulename = block1 | block2 | ... | blockn
void
rulename(State *st)
{
CHOOSE(
OR(block1)
OR(block2)
...
OR(blockn)
)
}
rulename = block1 block2 ... blockn
void
rulename(State *st)
{
EVERY(
AND(block1)
AND(block2)
...
AND(blockn)
)
}
m
and n
must be an integer greater or equal to 0, or the INF
constant.
rulename = n*m(block)
rulename = n*(block)
rulename = *m(block)
void
rulename(State *st)
{
/* rulename = n*m(block) */
REPEAT(n, m,
bloc
)
/* rulename = n*(block) */
REPEAT(n, INF,
bloc
)
/* rulename = *m(block) */
REPEAT(0, m,
bloc
)
}
rulename = [ block ]
void
rulename(State *st)
{
OPTIONAL(block)
}
rulename = rulename1
void
rulename(State *st)
{
RULENAME(bit)
}
void
bit(State *st)
{
/* bit = %x30-31 ;[0-1] */
ASCIIRANGE(0x30, 0x31))
}
date = day month year
year = 4*DIGIT
month = FWS month-name FWS
month-name = "Jan" / "Feb" / "Mar" / "Apr" /
"May" / "Jun" / "Jul" / "Aug" /
"Sep" / "Oct" / "Nov" / "Dec"
day = 1*2DIGIT
void
date(State *st)
{
EVERY(
AND(RULENAME(day))
AND(RULENAME(month))
AND(RULENAME(year))
)
}
void
year(State *st)
{
REPEAT(4, INF,
RULENAME(digit)
)
}
void
month(State *st)
{
EVERY(
AND(ASCIIRANGE(0x2f, 0x2f))
AND(RULENAME(month_name))
AND(ASCIIRANGE(0x2f, 0x2f))
)
}
void
month_name(State *st)
{
CHOOSE(
OR(STR(Jan))
OR(STR(Feb))
/* ... */
OR(STR(Dec))
)
}
void
day(State *st)
{
REPEAT(1, 2,
RULENAME(digit)
)
}