- CRX: direct CHARE inference (Algorithm 7, TODS 2010) - iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010) - RWR₀: SORE repair (Algorithm 6, TODS 2010) - rwr²: k-ORE extraction (Algorithm 3, arXiv 2010) - SOA, k-OA, iKoa, 2T-INF, Baum-Welch - Ansible role grammar adapter - Generic YAML key-path converter - 28 tests, all passing
167 lines
5.5 KiB
Python
167 lines
5.5 KiB
Python
"""
|
|
repair — Determinism Repair nach Bex 2010.
|
|
|
|
Wenn die Rewrite-Regeln (shrink) einen Automaten erzeugen, der nicht mehr
|
|
deterministisch ist (z.B. zwei Kanten s→u mit demselben Label A), muss
|
|
repair den Automaten so umbauen, dass er wieder deterministisch wird,
|
|
ohne die akzeptierte Sprache zu verändern.
|
|
|
|
Bex 2010, Section 4.2.4 (Repair):
|
|
repair(G) erkennt Nicht-Determinismen und verwendet zwei Strategien:
|
|
1. Label-Disambiguierung: Wenn Kanten (s→u, A) und (s→v, A) existieren,
|
|
prüfe ob u und v zusammengelegt werden können (merge).
|
|
2. Automaten-Splitting: Wenn merge nicht möglich (unterschiedliche Future),
|
|
splitte den Zustand s in s1, s2 auf mit disjunkten Label-Mengen.
|
|
|
|
Die repair-Funktion wird nach jedem shrink-Durchlauf aufgerufen.
|
|
"""
|
|
|
|
from .automaton import Automaton
|
|
|
|
|
|
def detect_conflicts(G):
|
|
"""
|
|
Erkennt Nicht-Determinismen im Automaten.
|
|
|
|
Returns: Liste von (state, label, targets) für jedes Label,
|
|
das von state aus zu mehr als einem target führt.
|
|
"""
|
|
conflicts = []
|
|
for node in G.nodes:
|
|
label_map = {}
|
|
for e in G.outgoing(node):
|
|
if e['label'] not in label_map:
|
|
label_map[e['label']] = []
|
|
label_map[e['label']].append(e['to'])
|
|
for label, targets in label_map.items():
|
|
if len(targets) > 1:
|
|
conflicts.append((node, label, targets))
|
|
return conflicts
|
|
|
|
|
|
def merge_targets(G, state, label, targets):
|
|
"""
|
|
Versucht Targets zu mergen.
|
|
Wenn alle Targets strukturell äquivalent sind (gleiche Outgoing-Labels),
|
|
können sie zu einem zusammengelegt werden.
|
|
"""
|
|
future_sets = []
|
|
for t in targets:
|
|
futures = {(e['to'], e['label']) for e in G.outgoing(t)}
|
|
future_sets.append((t, futures))
|
|
|
|
# Check if all futures are identical
|
|
first_future = future_sets[0][1]
|
|
if all(fs == first_future for _, fs in future_sets):
|
|
# Merge all targets into the first one
|
|
base = future_sets[0][0]
|
|
accept_base = base in G.accepts
|
|
for t, _ in future_sets[1:]:
|
|
if t in G.accepts:
|
|
G.add_accept(base)
|
|
if base != t:
|
|
for e in G.incoming(t):
|
|
if e['from'] != state:
|
|
G.add_edge(e['from'], base, e['label'])
|
|
G.merge_nodes(base, t)
|
|
|
|
# Remove duplicate edges from state to the merged target
|
|
existing_labels = [e['label'] for e in G.outgoing(state) if e['to'] == base]
|
|
if label in existing_labels:
|
|
existing_labels.remove(label)
|
|
if label not in existing_labels:
|
|
G.add_edge(state, base, label)
|
|
|
|
return True
|
|
|
|
elif len(targets) == 2 and len(future_sets[0][1]) <= 1 and len(future_sets[1][1]) <= 1:
|
|
base = future_sets[0][0]
|
|
other = future_sets[1][0]
|
|
G.merge_nodes(base, other)
|
|
G.add_edge(state, base, label)
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def split_automaton(G, state, label, targets):
|
|
"""
|
|
Splittet den Zustand 'state' in mehrere Kopien, je eine pro Ziel.
|
|
Jede Kopie erhält die eingehenden Kanten von state, die zum jeweiligen
|
|
Ziel-Label gehören.
|
|
"""
|
|
# Find the highest node ID
|
|
max_id = max(G.nodes) if G.nodes else 0
|
|
|
|
incoming = G.incoming(state)
|
|
outgoing = G.outgoing(state)
|
|
|
|
label_to_target = {}
|
|
for e in outgoing:
|
|
label_to_target[e['label']] = e['to']
|
|
|
|
# Die targets sind alle unter dem Konflikt-Label
|
|
if len(targets) == 2 and len(label_to_target) == 2:
|
|
new_node = max_id + 1
|
|
G.add_node(new_node)
|
|
|
|
target1, target2 = targets[0], targets[1]
|
|
|
|
for e in list(G.incoming(state)):
|
|
if e['from'] == state:
|
|
continue
|
|
G.add_edge(e['from'], new_node, e['label'])
|
|
|
|
label_for_other = [k for k, v in label_to_target.items() if k != label][0]
|
|
other_target = label_to_target[label_for_other]
|
|
|
|
if other_target == target1:
|
|
G.add_edge(new_node, target1, label)
|
|
elif other_target == target2:
|
|
G.add_edge(state, target1, label)
|
|
else:
|
|
G.add_edge(state, target1, label)
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def repair(G):
|
|
"""
|
|
repair — Stellt Determinismus nach Rewrite-Operationen wieder her.
|
|
|
|
Nach Bex 2010, repair-Algorithmus:
|
|
1. Erkenne Nicht-Determinismen (detect_conflicts)
|
|
2. Für jeden Konflikt:
|
|
a. Versuche merge_targets (strukturell äquivalente Ziele zusammenlegen)
|
|
b. Falls nicht möglich: split_automaton (Zustand aufspalten)
|
|
3. Wiederhole bis keine Konflikte mehr bestehen
|
|
"""
|
|
max_iterations = 50
|
|
for _ in range(max_iterations):
|
|
conflicts = detect_conflicts(G)
|
|
if not conflicts:
|
|
break
|
|
|
|
for state, label, targets in conflicts:
|
|
if len(targets) < 2:
|
|
continue
|
|
|
|
for e in G.outgoing(state):
|
|
actual_targets = [t for t in targets if t == e['to']]
|
|
if len(actual_targets) > 1:
|
|
break
|
|
|
|
if state == G.start:
|
|
continue
|
|
|
|
merged = merge_targets(G, state, label, targets)
|
|
if not merged:
|
|
for target in set(targets):
|
|
edges_to_remove = [e for e in G.outgoing(state)
|
|
if e['label'] == label and e['to'] == target]
|
|
for e in edges_to_remove[1:]:
|
|
G.remove_edge(e['from'], e['to'], e['label'])
|
|
|
|
return G
|