Function-Based Regex Builder Module Without a Clever Name
A tool to help you interactively develop regexes.
Use compose()
to join multiple patterns into one.
from rx import compose, group, match, maybe, then
prefix = compose(
match('http'), group(match('s')), maybe(), then('://')
)
print(prefix) # 'http(s)?\:\/\/'
Use rx()
to create a compiled regex object from a pattern.
from rx import compose, group, match, maybe, rx, then
prefix = compose(
match('http'), group(match('s')), maybe(), then('://')
)
compiled_pattern = rx(prefix)
compiled_pattern == re.compile(r'http(s)?\:\/\/', re.UNICODE) # True
In the US, one way a phone number can be expressed is as a sequence of:
- area code: three digits, possibly wrapped with parentheses
- separator: space(s), dash, dot, or non-existent
- prefix: three digits
- separator: space(s), dash, dot, or non-existent
- suffix: four digits
from rx import (compose, dot, digit, exactly_n_times,
group, match, maybe, OR, rx, spaces, then)
area_code = compose(
digit(), exactly_n_times(3),
OR(),
match('('), digit(), exactly_n_times(3), then(')')
)
print(area_code) # '\\d{3}|\\(\\d{3}\\)'
separator = compose(
spaces(), OR(), dot(), OR(), match('-')
)
separator_maybe = compose(group(separator), maybe())
print(separator_maybe) # '(\\s+|\\.|\\-)?'
prefix = compose(digit(), exactly_n_times(3))
print(prefix) # '\\d{3}'
suffix = compose(digit(), exactly_n_times(4))
print(suffix) # '\\d{4}'
phone_number_pattern = compose(
area_code,
separator_maybe,
prefix,
separator_maybe,
suffix
)
rx(phone_number_pattern) == re.compile('\\d{3}|\\(\\d{3}\\)(\\s+|\\.|\\-)?\\d{3}(\\s+|\\.|\\-)?\\d{4}') # True
Pattern Function | Example Input | Example Output |
---|---|---|
match(v) | match('Donuts+') |
Donuts\\+ |
then(v) (alias of match(v)) | then('Donuts+') |
Donuts\\+ |
Pattern Function | Example Input | Example Output |
---|---|---|
any_of(v) | any_of('abcd') |
[abcd] |
anything_but(v) | anything_but('a backpack') |
[^a\\ backpack]* |
char_range(from, to) | char_range('A','Z') |
A-Z |
something_but(v) | something_but('nothing') |
[^nothing]+ |
Pattern Function | Example Input | Example Output |
---|---|---|
group(*patterns) | group(match('p'), OR(), match('q')) |
(p|q) |
named_group(name, *patterns) | named_group('secure', match('https://')) |
(?P<secure>https://) |
non_capturing_group(*patterns) | non_capturing_group(match('http')) |
(?:http) |
Pattern Function | Example Input | Example Output |
---|---|---|
followed_by(v) | followed_by('...') |
(?=\\.\\.\\.) |
not_followed_by(v) | not_followed_by('...') |
(?!\\.\\.\\.) |
Pattern Function | Example Input | Example Output |
---|---|---|
preceded_by(v) | preceded_by('...') |
(?<=\\.\\.\\.) |
not_preceded_by(v) | not_preceded_by('...') |
(?<!\\.\\.\\.) |
Pattern Function | Output |
---|---|
between_n_and_m_times(n,m) | {n,m} |
between_n_and_m_times_lazy(n,m) | {n,m}? |
maybe() | ? |
one_or_more_times() | + |
OR() | | |
zero_or_more_times() | * |
Pattern Function | Output |
---|---|
digit() | \d |
end_of_line() | $ |
space() | \s |
spaces() | \s+ |
start_of_line() | ^ |
tab() | \t |
word() | \w+ |
word_boundary() | \b |
Pattern Function | Output |
---|---|
anything() | .* |
dot() | \\. |
linebreak() | (?:(?:\n)|(?:\r\n)) |
something() | .+ |