Contest problems database on von format (all problems in Portuguese). Base de dados de problemas no formato do von (todos os problemas em português).
In order to the PUIDs be generated correctly the function inferPUID
in the puid.py
file from von must be modified as shown below. Para que os PUIDs sejam gerados corretamente a função inferPUID
no arquivo puid.py
do von deve ser modificada conforme mostrado abaixo.
# Regular expressions for Brazilian Olympiads
re_obmep = re.compile(r"OBMEP N(?P<level>[123]) F(?P<phase>[12]) (?P<year>\d{4})/(?P<problem>\d+)")
re_canguru = re.compile(r"Canguru (?P<category>Benjamim|Cadete|Júnior) (?P<year>\d{4})/(?P<problem>\d+)")
re_obm = re.compile(r"OBM (?P<year>\d{4})/(?P<problem>\d+)")
re_obm_banco = re.compile(r"OBM Banco (?P<year>\d{4})/(?P<problem>\d+)")
def inferPUID(source: str) -> str:
source = source.replace("Finals", "F")
# Check for the Brazilian Olympiad formats first
for regex in [re_obmep, re_canguru, re_obm, re_obm_banco]:
if (m := regex.match(source)) is not None:
d = m.groupdict()
# Extract the last two digits of the year
year_last_two = d['year'][-2:]
# Construct PUID based on specific format
if regex == re_obmep:
contest = "OBMEP"
level_phase = f"N{d['level']}F{d['phase']}"
elif regex == re_canguru:
contest = f"Canguru{d['category']}"
level_phase = ""
elif regex == re_obm:
contest = "OBM"
level_phase = ""
elif regex == re_obm_banco:
contest = "OBMBanco"
level_phase = ""
source = f"{year_last_two}{lookup.get(contest, getOnlyAlphanum(contest))}{level_phase}P{getOnlyAlphanum(d['problem'])}"
thresh = 11
break
else:
# Your existing logic for other formats
if source[0] == "H" and source[1:].isdigit():
return source
else:
for k in sorted_lookup_keys:
if source.startswith(k):
source = lookup[k] + source[len(k):]
thresh = 30
break
else:
thresh = 8
source = getOnlyAlphanum(source)
if len(source) <= thresh:
return source
else:
# still too long, return some sort of hash
return "Z" + (hashlib.sha256(source.encode("ascii")).hexdigest())[0:7].upper()