by Romain Dehasseleer
Using Lark to translate “things” into “other things”. Because naming them would require documentation.
from lark import Lark, Transformer
import re
grammar = r"""
?start: expr
?expr: statement ("AND-THEN" expr)? -> chain
statement: "DEP" group ("EXC" group)? -> dep
| "ARR" group ("EXC" group)? -> arr
group: ID -> single
| "(" [ID ("," ID)*] ")" -> multi
ID: /[A-Z0-9]+(\*\*)?/
%import common.WS
%ignore WS
"""
class NLParser(Transformer):
def single(self, items):
return items[0].value.replace("**", "")
def multi(self, items):
return "/".join(item.value.replace("**", "") for item in items)
def dep(self, items):
base = f"DEP {items[0]}"
if len(items) == 2:
base += f" and not DEP {items[1]}"
return base
def arr(self, items):
base = f"ARR {items[0]}"
if len(items) == 2:
base += f" and not ARR {items[1]}"
return base
def chain(self, items):
if len(items) == 2:
return f"({items[0]} and {items[1]})"
return items[0]
parser = Lark(grammar, parser='lalr', transformer=NLParser())
def fix_group_with_exc(rule):
# Handle case where EXC is inside of parantheses
pattern = r'DEP\s*\(([^()]*?)\s+EXC\s+\(([^()]*?)\)\)'
rule = re.sub(pattern, r'DEP (\1) EXC (\2)', rule)
pattern = r'ARR\s*\(([^()]*?)\s+EXC\s+\(([^()]*?)\)\)'
rule = re.sub(pattern, r'ARR (\1) EXC (\2)', rule)
return rule
def parse_full_text_with_lark(text):
text = text.replace("NOT AVBL FOR TFC", "")
rules = re.findall(r'\d+\.\s*([^0-9]+)', text)
parsed_rules = []
for rule in rules:
fixed_rule = fix_group_with_exc(rule.strip())
parsed = parser.parse(fixed_rule)
parsed_rules.append(f"({parsed})")
return f"({' or '.join(parsed_rules)}) and VIA_S ABCDEFGH ABCDEFGH"
if __name__ == "__main__":
example1 = "NOT AVBL FOR TFC 1. DEP EP** AND-THEN ARR AB** 2. DEP AB** AND-THEN ARR BA**"
example2 = """NOT AVBL FOR TFC 1. DEP (AB**, ABCD, AB** EXC (ABCC, ABCE, ABCF, ABCG ,ABCH)) AND-THEN ARR AAAA
2. DEP EDDM AND-THEN ARR (BA**, BAAA, BA** EXC (BAAB, BAAC, BAAD, BAAE ,BAAF, BAAG, BAAH, BAAI, BAAJ))"""
print("EXEMPLE 1:\n", parse_full_text_with_lark(example1))
print("\nEXEMPLE 2:\n", parse_full_text_with_lark(example2))
Output
EXEMPLE 1:
(((DEP EP and ARR AB)) or ((DEP AB and ARR BA))) and VIA_S ABCDEFGH ABCDEFGH
EXEMPLE 2:
(((DEP AB/ABCD/AB and not DEP ABCC/ABCE/ABCF/ABCG/ABCH and ARR AAAA)) or ((DEP EDDM and ARR BA/BAAA/BA and not ARR BAAB/BAAC/BAAD/BAAE/BAAF/BAAG/BAAH/BAAI/BAAJ))) and VIA_S ABCDEFGH ABCDEFGH