grammar-inference-engine/bex/marking.py
tobjend 7c00c6713d Initial commit: BEX-based grammar inference engine
- CRX: direct CHARE inference (Algorithm 7, TODS 2010)
- iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010)
- RWR₀: SORE repair (Algorithm 6, TODS 2010)
- rwr²: k-ORE extraction (Algorithm 3, arXiv 2010)
- SOA, k-OA, iKoa, 2T-INF, Baum-Welch
- Ansible role grammar adapter
- Generic YAML key-path converter
- 28 tests, all passing
2026-07-01 08:01:16 +02:00

46 lines
1.2 KiB
Python

"""Marking — Convert k-OA to SOA over Σ^(k) (Definition 4.4, arXiv 1004.2372)."""
from .soa import SOA
from .expr import strip_k
def mark_koa(G):
"""
Mark a k-OA G as a SOA over Σ^(k).
Process nodes in arbitrary order. For the i-th occurrence of label a,
replace by a^(i) (represented as "a_i").
Returns a SOA H over Σ^(k) such that L(G) = strip(L(H)).
"""
H = SOA()
H.src = G.src
H.sink = G.sink
H._succ = {n: set(succ) for n, succ in G._succ.items()}
H._pred = {n: set(pred) for n, pred in G._pred.items()}
H._label = {}
H._next = G._next
counts = {}
for n in G._succ:
lab = G._label.get(n)
if lab and lab not in ('ε', '') and n not in (G.src, G.sink):
sym = strip_k(lab)
counts[sym] = counts.get(sym, 0) + 1
H._label[n] = f"{sym}_{counts[sym]}"
elif n in (G.src, G.sink):
H._label[n] = None
else:
H._label[n] = lab
return H
def strip_expression(expr):
"""Strip k-ORE markers from expression: a_i → a.
Returns expression over original alphabet Σ.
"""
import re
result = re.sub(r'(_\d+)', '', expr)
return result