"""CRX — Direct CHARE inference (Algorithm 7, TODS 2010).""" from collections import defaultdict from .expr import concat class CRX: """ |———— Algorithm 7: CRX ————| Input: sample S (list of token lists) Output: CHARE r such that S ⊆ L(r) """ def infer(self, sequences): S = [list(s) for s in sequences if s] if not S: return 'ε' sigma = set() for w in S: for a in w: sigma.add(a) if not sigma: return 'ε' # Step 1: Compute ImmedPred and equivalence classes ≈_S immed = set() for w in S: for i in range(len(w) - 1): immed.add((w[i], w[i + 1])) # Reachability: →_S (reflexive, transitive closure) closure = self._transitive_closure(sigma, immed) # Equivalence: a ≈_S b iff a →*_S b and b →*_S a eq = self._equivalence(sigma, closure) # Build class map: symbol → class index sym_to_cls = {} classes = [] for cls_syms in eq: idx = len(classes) for sym in cls_syms: sym_to_cls[sym] = idx classes.append(set(cls_syms)) # Step 2-3: Preserve only singleton nodes? No, the algorithm says merge singletons # that share Pred/Succ in the Hasse diagram. But actually, looking at the algorithm # more carefully: # # "while a maximal set of singleton nodes γ₁,...,γ_ℓ such that # Pred_HS(γ₁)=···=Pred_HS(γ_ℓ) and Succ_HS(γ₁)=···=Succ_HS(γ_ℓ) exists do # Replace γ₁,...,γ_ℓ by γ := ∪ⱼ γⱼ" # # This merges singleton equivalence classes (classes with exactly one symbol) # that have the same Pred and Succ sets in the Hasse diagram. changed = True while changed: changed = False singleton_ids = [i for i, c in enumerate(classes) if len(c) == 1] # Compute Pred and Succ for each singleton (considering ALL symbols in each class) hs_pred = {} hs_succ = {} for i in singleton_ids: hs_pred[i] = set() hs_succ[i] = set() sym_i = next(iter(classes[i])) for j, c in enumerate(classes): if i == j: continue if any((sym_j, sym_i) in immed for sym_j in c): hs_pred[i].add(j) if any((sym_i, sym_j) in immed for sym_j in c): hs_succ[i].add(j) # Group by same (Pred, Succ) groups = defaultdict(list) for i in singleton_ids: groups[(frozenset(hs_pred[i]), frozenset(hs_succ[i]))].append(i) for (pred_set, succ_set), group in groups.items(): if len(group) >= 2: merged = set() for i in group: merged.update(classes[i]) new_id = len(classes) classes.append(merged) for i in sorted(group, reverse=True): classes.pop(i) changed = True break # After merging, rebuild sym_to_cls to map to new class indices sym_to_cls = {} for idx, cls in enumerate(classes): for sym in cls: sym_to_cls[sym] = idx # Step 5: Topological sort of the Hasse diagram adj = {i: set() for i in range(len(classes))} indeg = {i: 0 for i in range(len(classes))} for a, b in immed: ca, cb = sym_to_cls.get(a), sym_to_cls.get(b) if ca is not None and cb is not None and ca != cb: if cb not in adj[ca]: adj[ca].add(cb) indeg[cb] += 1 # Topological sort (Kahn's algorithm) order = [] q = [i for i in range(len(classes)) if indeg[i] == 0] while q: i = q.pop(0) order.append(i) for j in adj[i]: indeg[j] -= 1 if indeg[j] == 0: q.append(j) remaining = set(range(len(classes))) - set(order) order.extend(remaining) # Step 6-16: Assign chain factors (Algorithm 7 lines 7-14) def count_in_class(w, syms): return sum(1 for a in w if a in syms) parts = [] for i in order: syms = classes[i] counts = [count_in_class(w, syms) for w in S] all_exactly_one = all(c == 1 for c in counts) all_at_most_one = all(c <= 1 for c in counts) all_at_least_one = all(c >= 1 for c in counts) some_two_or_more = any(c >= 2 for c in counts) sym_list = sorted(syms) factor = '+'.join(sym_list) if len(sym_list) > 1: factor = '(' + factor + ')' if all_exactly_one: pass # (a₁+···+aₙ) elif all_at_most_one: factor += '?' # (a₁+···+aₙ)? elif all_at_least_one and some_two_or_more: factor += '+' # (a₁+···+aₙ)+ else: factor += '+?' # (a₁+···+aₙ)+? parts.append(factor) if not parts: return 'ε' return '.'.join(parts) def _transitive_closure(self, sigma, immed): """Compute reflexive, transitive closure of immed relation.""" closure = {(a, b) for (a, b) in immed} for a in sigma: closure.add((a, a)) changed = True while changed: changed = False for a in sigma: for b in sigma: for c in sigma: if (a, b) in closure and (b, c) in closure and (a, c) not in closure: closure.add((a, c)) changed = True return closure def _equivalence(self, sigma, closure): """Compute equivalence classes of ≈_S.""" remaining = set(sigma) classes = [] while remaining: a = remaining.pop() cls = {a} added = True while added: added = False for b in list(remaining): if (a, b) in closure and (b, a) in closure: if b not in cls: cls.add(b) remaining.discard(b) added = True classes.append(cls) return classes