- CRX: direct CHARE inference (Algorithm 7, TODS 2010) - iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010) - RWR₀: SORE repair (Algorithm 6, TODS 2010) - rwr²: k-ORE extraction (Algorithm 3, arXiv 2010) - SOA, k-OA, iKoa, 2T-INF, Baum-Welch - Ansible role grammar adapter - Generic YAML key-path converter - 28 tests, all passing
191 lines
6.6 KiB
Python
191 lines
6.6 KiB
Python
"""CRX — Direct CHARE inference (Algorithm 7, TODS 2010)."""
|
||
|
||
from collections import defaultdict
|
||
from .expr import concat
|
||
|
||
|
||
class CRX:
|
||
"""
|
||
|———— Algorithm 7: CRX ————|
|
||
Input: sample S (list of token lists)
|
||
Output: CHARE r such that S ⊆ L(r)
|
||
"""
|
||
|
||
def infer(self, sequences):
|
||
S = [list(s) for s in sequences if s]
|
||
if not S:
|
||
return 'ε'
|
||
|
||
sigma = set()
|
||
for w in S:
|
||
for a in w:
|
||
sigma.add(a)
|
||
if not sigma:
|
||
return 'ε'
|
||
|
||
# Step 1: Compute ImmedPred and equivalence classes ≈_S
|
||
immed = set()
|
||
for w in S:
|
||
for i in range(len(w) - 1):
|
||
immed.add((w[i], w[i + 1]))
|
||
|
||
# Reachability: →_S (reflexive, transitive closure)
|
||
closure = self._transitive_closure(sigma, immed)
|
||
|
||
# Equivalence: a ≈_S b iff a →*_S b and b →*_S a
|
||
eq = self._equivalence(sigma, closure)
|
||
|
||
# Build class map: symbol → class index
|
||
sym_to_cls = {}
|
||
classes = []
|
||
for cls_syms in eq:
|
||
idx = len(classes)
|
||
for sym in cls_syms:
|
||
sym_to_cls[sym] = idx
|
||
classes.append(set(cls_syms))
|
||
|
||
# Step 2-3: Preserve only singleton nodes? No, the algorithm says merge singletons
|
||
# that share Pred/Succ in the Hasse diagram. But actually, looking at the algorithm
|
||
# more carefully:
|
||
#
|
||
# "while a maximal set of singleton nodes γ₁,...,γ_ℓ such that
|
||
# Pred_HS(γ₁)=···=Pred_HS(γ_ℓ) and Succ_HS(γ₁)=···=Succ_HS(γ_ℓ) exists do
|
||
# Replace γ₁,...,γ_ℓ by γ := ∪ⱼ γⱼ"
|
||
#
|
||
# This merges singleton equivalence classes (classes with exactly one symbol)
|
||
# that have the same Pred and Succ sets in the Hasse diagram.
|
||
|
||
changed = True
|
||
while changed:
|
||
changed = False
|
||
singleton_ids = [i for i, c in enumerate(classes) if len(c) == 1]
|
||
|
||
# Compute Pred and Succ for each singleton (considering ALL symbols in each class)
|
||
hs_pred = {}
|
||
hs_succ = {}
|
||
for i in singleton_ids:
|
||
hs_pred[i] = set()
|
||
hs_succ[i] = set()
|
||
sym_i = next(iter(classes[i]))
|
||
for j, c in enumerate(classes):
|
||
if i == j:
|
||
continue
|
||
if any((sym_j, sym_i) in immed for sym_j in c):
|
||
hs_pred[i].add(j)
|
||
if any((sym_i, sym_j) in immed for sym_j in c):
|
||
hs_succ[i].add(j)
|
||
|
||
# Group by same (Pred, Succ)
|
||
groups = defaultdict(list)
|
||
for i in singleton_ids:
|
||
groups[(frozenset(hs_pred[i]), frozenset(hs_succ[i]))].append(i)
|
||
|
||
for (pred_set, succ_set), group in groups.items():
|
||
if len(group) >= 2:
|
||
merged = set()
|
||
for i in group:
|
||
merged.update(classes[i])
|
||
new_id = len(classes)
|
||
classes.append(merged)
|
||
for i in sorted(group, reverse=True):
|
||
classes.pop(i)
|
||
changed = True
|
||
break
|
||
|
||
# After merging, rebuild sym_to_cls to map to new class indices
|
||
sym_to_cls = {}
|
||
for idx, cls in enumerate(classes):
|
||
for sym in cls:
|
||
sym_to_cls[sym] = idx
|
||
|
||
# Step 5: Topological sort of the Hasse diagram
|
||
adj = {i: set() for i in range(len(classes))}
|
||
indeg = {i: 0 for i in range(len(classes))}
|
||
for a, b in immed:
|
||
ca, cb = sym_to_cls.get(a), sym_to_cls.get(b)
|
||
if ca is not None and cb is not None and ca != cb:
|
||
if cb not in adj[ca]:
|
||
adj[ca].add(cb)
|
||
indeg[cb] += 1
|
||
|
||
# Topological sort (Kahn's algorithm)
|
||
order = []
|
||
q = [i for i in range(len(classes)) if indeg[i] == 0]
|
||
while q:
|
||
i = q.pop(0)
|
||
order.append(i)
|
||
for j in adj[i]:
|
||
indeg[j] -= 1
|
||
if indeg[j] == 0:
|
||
q.append(j)
|
||
remaining = set(range(len(classes))) - set(order)
|
||
order.extend(remaining)
|
||
|
||
# Step 6-16: Assign chain factors (Algorithm 7 lines 7-14)
|
||
def count_in_class(w, syms):
|
||
return sum(1 for a in w if a in syms)
|
||
|
||
parts = []
|
||
for i in order:
|
||
syms = classes[i]
|
||
counts = [count_in_class(w, syms) for w in S]
|
||
|
||
all_exactly_one = all(c == 1 for c in counts)
|
||
all_at_most_one = all(c <= 1 for c in counts)
|
||
all_at_least_one = all(c >= 1 for c in counts)
|
||
some_two_or_more = any(c >= 2 for c in counts)
|
||
|
||
sym_list = sorted(syms)
|
||
factor = '+'.join(sym_list)
|
||
if len(sym_list) > 1:
|
||
factor = '(' + factor + ')'
|
||
|
||
if all_exactly_one:
|
||
pass # (a₁+···+aₙ)
|
||
elif all_at_most_one:
|
||
factor += '?' # (a₁+···+aₙ)?
|
||
elif all_at_least_one and some_two_or_more:
|
||
factor += '+' # (a₁+···+aₙ)+
|
||
else:
|
||
factor += '+?' # (a₁+···+aₙ)+?
|
||
|
||
parts.append(factor)
|
||
|
||
if not parts:
|
||
return 'ε'
|
||
return '.'.join(parts)
|
||
|
||
def _transitive_closure(self, sigma, immed):
|
||
"""Compute reflexive, transitive closure of immed relation."""
|
||
closure = {(a, b) for (a, b) in immed}
|
||
for a in sigma:
|
||
closure.add((a, a))
|
||
changed = True
|
||
while changed:
|
||
changed = False
|
||
for a in sigma:
|
||
for b in sigma:
|
||
for c in sigma:
|
||
if (a, b) in closure and (b, c) in closure and (a, c) not in closure:
|
||
closure.add((a, c))
|
||
changed = True
|
||
return closure
|
||
|
||
def _equivalence(self, sigma, closure):
|
||
"""Compute equivalence classes of ≈_S."""
|
||
remaining = set(sigma)
|
||
classes = []
|
||
while remaining:
|
||
a = remaining.pop()
|
||
cls = {a}
|
||
added = True
|
||
while added:
|
||
added = False
|
||
for b in list(remaining):
|
||
if (a, b) in closure and (b, a) in closure:
|
||
if b not in cls:
|
||
cls.add(b)
|
||
remaining.discard(b)
|
||
added = True
|
||
classes.append(cls)
|
||
return classes
|