grammar-inference-engine/bex/automaton.py
tobjend 7c00c6713d Initial commit: BEX-based grammar inference engine
- CRX: direct CHARE inference (Algorithm 7, TODS 2010)
- iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010)
- RWR₀: SORE repair (Algorithm 6, TODS 2010)
- rwr²: k-ORE extraction (Algorithm 3, arXiv 2010)
- SOA, k-OA, iKoa, 2T-INF, Baum-Welch
- Ansible role grammar adapter
- Generic YAML key-path converter
- 28 tests, all passing
2026-07-01 08:01:16 +02:00

130 lines
4.3 KiB
Python

"""
Automaton — Graph representation for BEX algorithms.
Ein Automaton ist ein gerichteter Graph mit beschrifteten Kanten (Labels = Token).
Dient als Basis für:
- Prefix-Tree Automaton (aus Beispielsequenzen)
- SORE/CHARE Transformation via shrink-Rewrite-Regeln
- Determinism-Check und repair
Die Implementierung folgt der Struktur aus Bex et al. 2010 (TWEB):
- Nodes: Menge der Zustände
- Edges: Liste von (from, to, label, prob) — prob optional für HMM
- start: Startzustand
- accepts: Menge akzeptierender Zustände
"""
class Automaton:
def __init__(self, start=None):
self.nodes = set()
self.edges = []
self.start = start
self.accepts = set()
def add_node(self, node):
self.nodes.add(node)
def add_edge(self, u, v, label, prob=None):
self.edges.append({
'from': u,
'to': v,
'label': label,
'prob': prob,
})
self.add_node(u)
self.add_node(v)
def remove_edge(self, u, v, label):
self.edges = [
e for e in self.edges
if not (e['from'] == u and e['to'] == v and e['label'] == label)
]
def remove_all_edges_between(self, u, v):
self.edges = [
e for e in self.edges
if not (e['from'] == u and e['to'] == v)
]
def set_start(self, node):
self.start = node
self.add_node(node)
def add_accept(self, node):
self.accepts.add(node)
self.add_node(node)
def outgoing(self, node):
return [e for e in self.edges if e['from'] == node]
def incoming(self, node):
return [e for e in self.edges if e['to'] == node]
def successors(self, node):
return {(e['to'], e['label']) for e in self.outgoing(node)}
def has_edge(self, u, v, label):
return any(
e['from'] == u and e['to'] == v and e['label'] == label
for e in self.edges
)
def has_self_loop(self, node):
return any(e['from'] == node and e['to'] == node for e in self.edges)
def labels_on_edge(self, u, v):
return [e['label'] for e in self.edges if e['from'] == u and e['to'] == v]
def is_deterministic(self):
"""Prüft ob der Automat deterministisch ist (keine zwei Kanten mit gleichem Label von einem Zustand)."""
for node in self.nodes:
seen = set()
for e in self.outgoing(node):
if e['label'] in seen:
return False
seen.add(e['label'])
return True
def merge_nodes(self, target, source):
"""Vereinigt source in target: Alle Kanten von/zu source werden auf target umgeleitet."""
new_edges = []
for e in self.edges:
if e['from'] == source and e['to'] == source:
new_edges.append({'from': target, 'to': target, 'label': e['label']})
elif e['from'] == source:
new_edges.append({'from': target, 'to': e['to'], 'label': e['label']})
elif e['to'] == source:
new_edges.append({'from': e['from'], 'to': target, 'label': e['label']})
else:
new_edges.append(e)
self.edges = new_edges
if source in self.accepts:
self.accepts.add(target)
if source in self.accepts:
self.accepts.discard(source)
if source in self.nodes:
self.nodes.discard(source)
def copy(self):
import copy
return copy.deepcopy(self)
def __repr__(self):
return (f"Automaton(nodes={len(self.nodes)}, edges={len(self.edges)}, "
f"start={self.start}, accepts={self.accepts})")
def to_dot(self):
lines = ["digraph Automaton {"]
lines.append(" rankdir=LR;")
lines.append(f' start [shape=point];')
lines.append(f' start -> {self.start};')
for n in self.nodes:
shape = "doublecircle" if n in self.accepts else "circle"
lines.append(f' {n} [shape={shape}];')
for e in self.edges:
label = e['label'].replace('"', '\\"')
prob = f" [{e['prob']:.2f}]" if e['prob'] is not None else ""
lines.append(f' {e["from"]} -> {e["to"]} [label="{label}{prob}"];')
lines.append("}")
return '\n'.join(lines)