9 August 2025

Prototype with lark

by Romain Dehasseleer


Prototype with lark

Using Lark to translate “things” into “other things”. Because naming them would require documentation.

from lark import Lark, Transformer
import re

grammar = r"""
?start: expr

?expr: statement ("AND-THEN" expr)? -> chain

statement: "DEP" group ("EXC" group)?    -> dep
         | "ARR" group ("EXC" group)?    -> arr

group: ID                                -> single
     | "(" [ID ("," ID)*] ")"            -> multi

ID: /[A-Z0-9]+(\*\*)?/

%import common.WS
%ignore WS
"""

class NLParser(Transformer):
    def single(self, items):
        return items[0].value.replace("**", "")

    def multi(self, items):
        return "/".join(item.value.replace("**", "") for item in items)

    def dep(self, items):
        base = f"DEP {items[0]}"
        if len(items) == 2:
            base += f" and not DEP {items[1]}"
        return base

    def arr(self, items):
        base = f"ARR {items[0]}"
        if len(items) == 2:
            base += f" and not ARR {items[1]}"
        return base

    def chain(self, items):
        if len(items) == 2:
            return f"({items[0]} and {items[1]})"
        return items[0]

parser = Lark(grammar, parser='lalr', transformer=NLParser())

def fix_group_with_exc(rule):
    # Handle case where EXC is inside of parantheses
    pattern = r'DEP\s*\(([^()]*?)\s+EXC\s+\(([^()]*?)\)\)'
    rule = re.sub(pattern, r'DEP (\1) EXC (\2)', rule)

    pattern = r'ARR\s*\(([^()]*?)\s+EXC\s+\(([^()]*?)\)\)'
    rule = re.sub(pattern, r'ARR (\1) EXC (\2)', rule)

    return rule


def parse_full_text_with_lark(text):
    text = text.replace("NOT AVBL FOR TFC", "")
    rules = re.findall(r'\d+\.\s*([^0-9]+)', text)
    parsed_rules = []

    for rule in rules:
        fixed_rule = fix_group_with_exc(rule.strip())
        parsed = parser.parse(fixed_rule)
        parsed_rules.append(f"({parsed})")

    return f"({' or '.join(parsed_rules)}) and VIA_S ABCDEFGH ABCDEFGH"


if __name__ == "__main__":
    example1 = "NOT AVBL FOR TFC     1. DEP EP** AND-THEN ARR AB**     2. DEP AB** AND-THEN ARR BA**"
    example2 = """NOT AVBL FOR TFC     1. DEP (AB**, ABCD, AB** EXC (ABCC, ABCE, ABCF, ABCG ,ABCH)) AND-THEN ARR AAAA
                    2. DEP EDDM AND-THEN ARR (BA**, BAAA, BA** EXC (BAAB, BAAC, BAAD, BAAE ,BAAF, BAAG, BAAH, BAAI, BAAJ))"""

    print("EXEMPLE 1:\n", parse_full_text_with_lark(example1))
    print("\nEXEMPLE 2:\n", parse_full_text_with_lark(example2))

Output

EXEMPLE 1:
 (((DEP EP and ARR AB)) or ((DEP AB and ARR BA))) and VIA_S ABCDEFGH ABCDEFGH

EXEMPLE 2:
 (((DEP AB/ABCD/AB and not DEP ABCC/ABCE/ABCF/ABCG/ABCH and ARR AAAA)) or ((DEP EDDM and ARR BA/BAAA/BA and not ARR BAAB/BAAC/BAAD/BAAE/BAAF/BAAG/BAAH/BAAI/BAAJ))) and VIA_S ABCDEFGH ABCDEFGH
tags: python