/laucha

Regular expressions parser

Primary LanguagePython

Laucha, Regular Expressions Parser

Given a regular expression this program builds its parsing tree. The parser is a manually written recursive descent parser from a BNF description of a RE syntax.

Example output:

$ python laucha.py 

(a|b)*aab
[('special', '('),
 ('literal', 'a'),
 ('special', '|'),
 ('literal', 'b'),
 ('special', ')'),
 ('special', '*'),
 ('literal', 'a'),
 ('literal', 'a'),
 ('literal', 'b'),
 ('eos', None)]
('RE',
 ('RE',
  ('simple_RE',
   ('basic_RE',
    ('star',
     ('elementary_RE',
      ('group',
       ('special', '('),
       ('RE',
        ('simple_RE',
         ('basic_RE', ('elementary_RE', ('char', ('literal', 'a'))))),
        ('union',
         ('special', '|'),
         ('simple_RE',
          ('basic_RE', ('elementary_RE', ('char', ('literal', 'b'))))))),
       ('special', ')'))),
     ('special', '*'))),
   ('concatenation',
    ('simple_RE',
     ('basic_RE', ('elementary_RE', ('char', ('literal', 'a')))),
     ('concatenation',
      ('simple_RE',
       ('basic_RE', ('elementary_RE', ('char', ('literal', 'a')))),
       ('concatenation',
        ('simple_RE',
         ('basic_RE', ('elementary_RE', ('char', ('literal', 'b')))))))))))),
 ('eos', None))

([cC]at)|([dD]og)
[('special', '('),
 ('special', '['),
 ('literal', 'c'),
 ('literal', 'C'),
 ('special', ']'),
 ('literal', 'a'),
 ('literal', 't'),
 ('special', ')'),
 ('special', '|'),
 ('special', '('),
 ('special', '['),
 ('literal', 'd'),
 ('literal', 'D'),
 ('special', ']'),
 ('literal', 'o'),
 ('literal', 'g'),
 ('special', ')'),
 ('eos', None)]
('RE',
 ('RE',
  ('simple_RE',
   ('basic_RE',
    ('elementary_RE',
     ('group',
      ('special', '('),
      ('RE',
       ('simple_RE',
        ('basic_RE',
         ('elementary_RE',
          ('set',
           ('positive_set',
            ('special', '['),
            ('set_items',
             ('set_item', ('char', ('literal', 'c'))),
             ('set_items',
              ('set_item', ('char', ('literal', 'C'))))),
            ('special', ']'))))),
        ('concatenation',
         ('simple_RE',
          ('basic_RE', ('elementary_RE', ('char', ('literal', 'a')))),
          ('concatenation',
           ('simple_RE',
            ('basic_RE',
             ('elementary_RE', ('char', ('literal', 't')))))))))),
      ('special', ')'))))),
  ('union',
   ('special', '|'),
   ('simple_RE',
    ('basic_RE',
     ('elementary_RE',
      ('group',
       ('special', '('),
       ('RE',
        ('simple_RE',
         ('basic_RE',
          ('elementary_RE',
           ('set',
            ('positive_set',
             ('special', '['),
             ('set_items',
              ('set_item', ('char', ('literal', 'd'))),
              ('set_items',
               ('set_item', ('char', ('literal', 'D'))))),
             ('special', ']'))))),
         ('concatenation',
          ('simple_RE',
           ('basic_RE',
            ('elementary_RE', ('char', ('literal', 'o')))),
           ('concatenation',
            ('simple_RE',
             ('basic_RE',
              ('elementary_RE', ('char', ('literal', 'g')))))))))),
       ('special', ')'))))))),
 ('eos', None))