dump grammar as ebnf
bloerwald opened this issue · 1 comments
bloerwald commented
py3 code has dbd_grammar.py
which already dumps something EBNF-y based on the parser used, but that needs manual cleanup since it is kind of ugly, uses bad names and stuff.
dbd_file = 'COLUMNS', EOL, ( column_definition, EOL ), {column_definition, EOL}, EOL, ( definition, ( EOF | EOL ) ), {definition, ( EOF | EOL )};
column_definition = column_type, [foreign_identifier], ? SPACE ?, column_name, ['?'], [eol_c_comment];
definition = definition_header, {definition_header}, ( definition_entry, EOL ), {definition_entry, EOL};
column_type = ( 'uint' | 'int' | 'string' | 'locstring' | 'float' );
foreign_identifier = '<', identifier, '::', identifier, '>';
column_name = identifier;
eol_c_comment = [? SPACE ?], '//', ? rest of the line ?;
definition_header = ( definition_BUILD | definition_LAYOUT | definition_COMMENT ), EOL;
definition_entry = ['$', annotation, '$'], column_name, ['<', int_width, '>'], ['[', array_size, ']'], [eol_c_comment];
identifier = ? WORD('A-Za-z_', 'A-Za-z0-9_') ?;
definition_BUILD = 'BUILD', ? SPACE ?, build_version_range, {build_version_range};
definition_LAYOUT = 'LAYOUT', ? SPACE ?, layout_hash, {layout_hash};
definition_COMMENT = 'COMMENT', ? SPACE ?, ? rest of the line ?;
annotation = identifier;
int_width = integer;
array_size = integer;
build_version_range = build_version, ['-', build_version];
layout_hash = ? WORD('a-fA-F0-9') ?;
integer = ? WORD('0-9') ?;
build_version = integer, '.', integer, '.', integer, '.', integer;
Warpten commented
Here's my 20 minutes of effort as an ANTLR v4 combined grammar. It's missing some stuff:
- Inlined comments in the file
COMMENT foo
I don't know where those are supposed to appear so I chose to ignore them
People that actually write grammars will probably roll their eyes to the back of their skulls when they see the formatting but whatever.
I tried to make some tokens skipped but that just completely breaks everything.
grammar ExprLexer;
LAYOUT : 'LAYOUT' ;
COLUMNS : 'COLUMNS' ;
BUILD : 'BUILD' -> skip ;
TYPE : 'int' | 'float' | 'string' | 'locstring' ;
BUILD_NUMBER : [0-9]+.[0-9]+.[0-9]+.[0-9]+ ;
LAYOUT_HASH : [A-Fa-f0-9]+ ;
ANNOTATION : 'id' | 'relation' | 'noninline' ;
REFERENCE : '<' [A-Za-z0-9_]+ '::' [A-Za-z0-9_]+ '>';
IDENTIFIER : [a-zA-Z0-9_]+ ;
GUESSED : '?' ;
INLINE_COMMENT : '//' [ a-zA-Z0-9._()-]* ;
DASH : '-' -> skip;
COMMA : ',' -> skip;
DOLLAR : '$' ;
WS: [ \t\n\r\f]+ -> skip ;
COLON : ':' -> skip;
reference : REFERENCE;
column_definition : TYPE reference? IDENTIFIER GUESSED? INLINE_COMMENT?;
column_definitions : column_definition+;
size : '<' ('8' | '16' | '32' | '64' | 'u8' | 'u16' | 'u32' | 'u64') '>' ;
layout : LAYOUT (LAYOUT_HASH COMMA?)* ;
builds : (BUILD? (BUILD_NUMBER | (BUILD_NUMBER DASH BUILD_NUMBER)) COMMA?)* ;
annotation_list : DOLLAR (ANNOTATION COMMA?)* DOLLAR ;
column_reference : annotation_list? IDENTIFIER size? ;
structure_definition : layout? builds column_reference+ ;
file : COLUMNS column_definitions structure_definition+ EOF ;