- CRX: direct CHARE inference (Algorithm 7, TODS 2010) - iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010) - RWR₀: SORE repair (Algorithm 6, TODS 2010) - rwr²: k-ORE extraction (Algorithm 3, arXiv 2010) - SOA, k-OA, iKoa, 2T-INF, Baum-Welch - Ansible role grammar adapter - Generic YAML key-path converter - 28 tests, all passing
193 lines
5.7 KiB
Python
193 lines
5.7 KiB
Python
"""SOA — Single Occurrence Automaton (Definition 6, TODS 2010)."""
|
||
|
||
import copy
|
||
from .expr import concat, disj, star, optional
|
||
|
||
|
||
class SOA:
|
||
"""
|
||
Node-labeled automaton (Definition 6, TODS 2010).
|
||
|
||
V = {src, sink} ∪ symbol-labeled states.
|
||
E ⊆ V × V, unlabeled edges.
|
||
Walk src=v₁,v₂,...,vₙ₊₁=sink accepts word lab(v₂)...lab(vₙ).
|
||
|
||
States are proper SOREs, pairwise alphabet-disjoint (Definition 10).
|
||
"""
|
||
|
||
def __init__(self):
|
||
self._next = 0
|
||
self._succ = {}
|
||
self._pred = {}
|
||
self._label = {}
|
||
self.src = self._new()
|
||
self.sink = self._new()
|
||
|
||
def _new(self):
|
||
n = self._next
|
||
self._next += 1
|
||
self._succ[n] = set()
|
||
self._pred[n] = set()
|
||
self._label[n] = None
|
||
return n
|
||
|
||
def add_state(self, label):
|
||
n = self._new()
|
||
self._label[n] = label
|
||
return n
|
||
|
||
def add_edge(self, f, t):
|
||
self._succ[f].add(t)
|
||
self._pred[t].add(f)
|
||
|
||
def rm_edge(self, f, t):
|
||
self._succ[f].discard(t)
|
||
self._pred[t].discard(f)
|
||
|
||
def rm_state(self, n):
|
||
if n in (self.src, self.sink):
|
||
return
|
||
for p in list(self._pred[n]):
|
||
self.rm_edge(p, n)
|
||
for s in list(self._succ[n]):
|
||
self.rm_edge(n, s)
|
||
del self._label[n]
|
||
del self._succ[n]
|
||
del self._pred[n]
|
||
|
||
def label(self, n):
|
||
return self._label.get(n)
|
||
|
||
def set_label(self, n, lab):
|
||
self._label[n] = lab
|
||
|
||
def succ(self, n):
|
||
return set(self._succ.get(n, set()))
|
||
|
||
def pred(self, n):
|
||
return set(self._pred.get(n, set()))
|
||
|
||
def has_edge(self, f, t):
|
||
return t in self._succ.get(f, set())
|
||
|
||
def states(self):
|
||
return [n for n in self._succ if n not in (self.src, self.sink) and self._label.get(n) is not None]
|
||
|
||
def _pred_plus(self, n):
|
||
r = set(self._pred.get(n, set()))
|
||
if self._label.get(n) and self._label[n].endswith('+'):
|
||
r.add(n)
|
||
return r
|
||
|
||
def _succ_plus(self, n):
|
||
r = set(self._succ.get(n, set()))
|
||
if self._label.get(n) and self._label[n].endswith('+'):
|
||
r.add(n)
|
||
return r
|
||
|
||
def copy(self):
|
||
return copy.deepcopy(self)
|
||
|
||
def accept(self, w):
|
||
cur = {self.src}
|
||
for sym in w:
|
||
nxt = set()
|
||
for s in cur:
|
||
for t in self._succ.get(s, set()):
|
||
if self._label.get(t) == sym:
|
||
nxt.add(t)
|
||
if not nxt:
|
||
return False
|
||
cur = nxt
|
||
return any(self.sink in self._succ.get(s, set()) for s in cur)
|
||
|
||
def sink_reachable(self):
|
||
seen = set()
|
||
q = [self.src]
|
||
while q:
|
||
s = q.pop()
|
||
if s == self.sink:
|
||
return True
|
||
if s in seen:
|
||
continue
|
||
seen.add(s)
|
||
q.extend(self._succ.get(s, []))
|
||
return False
|
||
|
||
def num_non_special(self):
|
||
return sum(1 for n in self._succ if n not in (self.src, self.sink))
|
||
|
||
def is_final(self):
|
||
ns = self.states()
|
||
return len(ns) == 1 and self.has_edge(self.src, ns[0]) and self.has_edge(ns[0], self.sink)
|
||
|
||
def expression(self):
|
||
if not self.is_final():
|
||
return None
|
||
return self._label[self.states()[0]]
|
||
|
||
def contract(self, r, s, new_label):
|
||
"""
|
||
State contraction G[r,s ⇒ t] (Definition 11, TODS 2010).
|
||
|
||
(1) Add t as new state with label new_label.
|
||
(2) Every v ∈ Pred(r) − {r,s} → predecessor of t.
|
||
(3) Every w ∈ Succ(s) − {r,s} → successor of t. [matching figures]
|
||
(4) Loop t→t if r ∈ Succ(s).
|
||
(5) Remove r, s and all edges.
|
||
"""
|
||
t = self._new()
|
||
self._label[t] = new_label
|
||
for v in self._pred.get(r, set()) - {r, s}:
|
||
self.add_edge(v, t)
|
||
for v in self._pred.get(s, set()) - {r, s}:
|
||
self.add_edge(v, t)
|
||
for w in self._succ.get(r, set()) - {r, s}:
|
||
self.add_edge(t, w)
|
||
for w in self._succ.get(s, set()) - {r, s}:
|
||
self.add_edge(t, w)
|
||
if r in self._succ.get(s, set()):
|
||
self.add_edge(t, t)
|
||
self.rm_state(r)
|
||
self.rm_state(s)
|
||
return t
|
||
|
||
def contract_single(self, r, new_label):
|
||
"""Single-state substitution G[r ⇒ t] (Definition 11 note)."""
|
||
if r in (self.src, self.sink):
|
||
return r
|
||
t = self._new()
|
||
self._label[t] = new_label
|
||
for v in self._pred.get(r, set()) - {r}:
|
||
self.add_edge(v, t)
|
||
for w in self._succ.get(r, set()) - {r}:
|
||
self.add_edge(t, w)
|
||
if r in self._succ.get(r, set()):
|
||
self.add_edge(t, t)
|
||
self.rm_state(r)
|
||
return t
|
||
|
||
def epsilon_closure(self):
|
||
"""G* (Definition 25, TODS 2010). Add self-loops for + states and ε-transitive closure."""
|
||
G = self.copy()
|
||
changed = True
|
||
while changed:
|
||
changed = False
|
||
for n in list(G._succ.keys()):
|
||
lab = G._label.get(n)
|
||
if lab and (lab.endswith('+') or lab.endswith('+?')):
|
||
if not G.has_edge(n, n):
|
||
G.add_edge(n, n)
|
||
changed = True
|
||
for n in list(G._succ.keys()):
|
||
for m in list(G._succ.get(n, set())):
|
||
mlab = G._label.get(m)
|
||
if mlab == 'ε':
|
||
for mp in list(G._succ.get(m, set())):
|
||
if mp != n and not G.has_edge(n, mp):
|
||
G.add_edge(n, mp)
|
||
changed = True
|
||
return G
|
||
|
||
def __repr__(self):
|
||
return f"SOA(nodes={len(self._succ)}, special={self.num_non_special()})"
|