"""SOA — Single Occurrence Automaton (Definition 6, TODS 2010).""" import copy from .expr import concat, disj, star, optional class SOA: """ Node-labeled automaton (Definition 6, TODS 2010). V = {src, sink} ∪ symbol-labeled states. E ⊆ V × V, unlabeled edges. Walk src=v₁,v₂,...,vₙ₊₁=sink accepts word lab(v₂)...lab(vₙ). States are proper SOREs, pairwise alphabet-disjoint (Definition 10). """ def __init__(self): self._next = 0 self._succ = {} self._pred = {} self._label = {} self.src = self._new() self.sink = self._new() def _new(self): n = self._next self._next += 1 self._succ[n] = set() self._pred[n] = set() self._label[n] = None return n def add_state(self, label): n = self._new() self._label[n] = label return n def add_edge(self, f, t): self._succ[f].add(t) self._pred[t].add(f) def rm_edge(self, f, t): self._succ[f].discard(t) self._pred[t].discard(f) def rm_state(self, n): if n in (self.src, self.sink): return for p in list(self._pred[n]): self.rm_edge(p, n) for s in list(self._succ[n]): self.rm_edge(n, s) del self._label[n] del self._succ[n] del self._pred[n] def label(self, n): return self._label.get(n) def set_label(self, n, lab): self._label[n] = lab def succ(self, n): return set(self._succ.get(n, set())) def pred(self, n): return set(self._pred.get(n, set())) def has_edge(self, f, t): return t in self._succ.get(f, set()) def states(self): return [n for n in self._succ if n not in (self.src, self.sink) and self._label.get(n) is not None] def _pred_plus(self, n): r = set(self._pred.get(n, set())) if self._label.get(n) and self._label[n].endswith('+'): r.add(n) return r def _succ_plus(self, n): r = set(self._succ.get(n, set())) if self._label.get(n) and self._label[n].endswith('+'): r.add(n) return r def copy(self): return copy.deepcopy(self) def accept(self, w): cur = {self.src} for sym in w: nxt = set() for s in cur: for t in self._succ.get(s, set()): if self._label.get(t) == sym: nxt.add(t) if not nxt: return False cur = nxt return any(self.sink in self._succ.get(s, set()) for s in cur) def sink_reachable(self): seen = set() q = [self.src] while q: s = q.pop() if s == self.sink: return True if s in seen: continue seen.add(s) q.extend(self._succ.get(s, [])) return False def num_non_special(self): return sum(1 for n in self._succ if n not in (self.src, self.sink)) def is_final(self): ns = self.states() return len(ns) == 1 and self.has_edge(self.src, ns[0]) and self.has_edge(ns[0], self.sink) def expression(self): if not self.is_final(): return None return self._label[self.states()[0]] def contract(self, r, s, new_label): """ State contraction G[r,s ⇒ t] (Definition 11, TODS 2010). (1) Add t as new state with label new_label. (2) Every v ∈ Pred(r) − {r,s} → predecessor of t. (3) Every w ∈ Succ(s) − {r,s} → successor of t. [matching figures] (4) Loop t→t if r ∈ Succ(s). (5) Remove r, s and all edges. """ t = self._new() self._label[t] = new_label for v in self._pred.get(r, set()) - {r, s}: self.add_edge(v, t) for v in self._pred.get(s, set()) - {r, s}: self.add_edge(v, t) for w in self._succ.get(r, set()) - {r, s}: self.add_edge(t, w) for w in self._succ.get(s, set()) - {r, s}: self.add_edge(t, w) if r in self._succ.get(s, set()): self.add_edge(t, t) self.rm_state(r) self.rm_state(s) return t def contract_single(self, r, new_label): """Single-state substitution G[r ⇒ t] (Definition 11 note).""" if r in (self.src, self.sink): return r t = self._new() self._label[t] = new_label for v in self._pred.get(r, set()) - {r}: self.add_edge(v, t) for w in self._succ.get(r, set()) - {r}: self.add_edge(t, w) if r in self._succ.get(r, set()): self.add_edge(t, t) self.rm_state(r) return t def epsilon_closure(self): """G* (Definition 25, TODS 2010). Add self-loops for + states and ε-transitive closure.""" G = self.copy() changed = True while changed: changed = False for n in list(G._succ.keys()): lab = G._label.get(n) if lab and (lab.endswith('+') or lab.endswith('+?')): if not G.has_edge(n, n): G.add_edge(n, n) changed = True for n in list(G._succ.keys()): for m in list(G._succ.get(n, set())): mlab = G._label.get(m) if mlab == 'ε': for mp in list(G._succ.get(m, set())): if mp != n and not G.has_edge(n, mp): G.add_edge(n, mp) changed = True return G def __repr__(self): return f"SOA(nodes={len(self._succ)}, special={self.num_non_special()})"