grammar-inference-engine/bex/soa.py

194 lines
5.7 KiB
Python
Raw Permalink Normal View History

"""SOA — Single Occurrence Automaton (Definition 6, TODS 2010)."""
import copy
from .expr import concat, disj, star, optional
class SOA:
"""
Node-labeled automaton (Definition 6, TODS 2010).
V = {src, sink} symbol-labeled states.
E V × V, unlabeled edges.
Walk src=v₁,v₂,...,vₙ=sink accepts word lab(v₂)...lab(vₙ).
States are proper SOREs, pairwise alphabet-disjoint (Definition 10).
"""
def __init__(self):
self._next = 0
self._succ = {}
self._pred = {}
self._label = {}
self.src = self._new()
self.sink = self._new()
def _new(self):
n = self._next
self._next += 1
self._succ[n] = set()
self._pred[n] = set()
self._label[n] = None
return n
def add_state(self, label):
n = self._new()
self._label[n] = label
return n
def add_edge(self, f, t):
self._succ[f].add(t)
self._pred[t].add(f)
def rm_edge(self, f, t):
self._succ[f].discard(t)
self._pred[t].discard(f)
def rm_state(self, n):
if n in (self.src, self.sink):
return
for p in list(self._pred[n]):
self.rm_edge(p, n)
for s in list(self._succ[n]):
self.rm_edge(n, s)
del self._label[n]
del self._succ[n]
del self._pred[n]
def label(self, n):
return self._label.get(n)
def set_label(self, n, lab):
self._label[n] = lab
def succ(self, n):
return set(self._succ.get(n, set()))
def pred(self, n):
return set(self._pred.get(n, set()))
def has_edge(self, f, t):
return t in self._succ.get(f, set())
def states(self):
return [n for n in self._succ if n not in (self.src, self.sink) and self._label.get(n) is not None]
def _pred_plus(self, n):
r = set(self._pred.get(n, set()))
if self._label.get(n) and self._label[n].endswith('+'):
r.add(n)
return r
def _succ_plus(self, n):
r = set(self._succ.get(n, set()))
if self._label.get(n) and self._label[n].endswith('+'):
r.add(n)
return r
def copy(self):
return copy.deepcopy(self)
def accept(self, w):
cur = {self.src}
for sym in w:
nxt = set()
for s in cur:
for t in self._succ.get(s, set()):
if self._label.get(t) == sym:
nxt.add(t)
if not nxt:
return False
cur = nxt
return any(self.sink in self._succ.get(s, set()) for s in cur)
def sink_reachable(self):
seen = set()
q = [self.src]
while q:
s = q.pop()
if s == self.sink:
return True
if s in seen:
continue
seen.add(s)
q.extend(self._succ.get(s, []))
return False
def num_non_special(self):
return sum(1 for n in self._succ if n not in (self.src, self.sink))
def is_final(self):
ns = self.states()
return len(ns) == 1 and self.has_edge(self.src, ns[0]) and self.has_edge(ns[0], self.sink)
def expression(self):
if not self.is_final():
return None
return self._label[self.states()[0]]
def contract(self, r, s, new_label):
"""
State contraction G[r,s t] (Definition 11, TODS 2010).
(1) Add t as new state with label new_label.
(2) Every v Pred(r) {r,s} predecessor of t.
(3) Every w Succ(s) {r,s} successor of t. [matching figures]
(4) Loop tt if r Succ(s).
(5) Remove r, s and all edges.
"""
t = self._new()
self._label[t] = new_label
for v in self._pred.get(r, set()) - {r, s}:
self.add_edge(v, t)
for v in self._pred.get(s, set()) - {r, s}:
self.add_edge(v, t)
for w in self._succ.get(r, set()) - {r, s}:
self.add_edge(t, w)
for w in self._succ.get(s, set()) - {r, s}:
self.add_edge(t, w)
if r in self._succ.get(s, set()):
self.add_edge(t, t)
self.rm_state(r)
self.rm_state(s)
return t
def contract_single(self, r, new_label):
"""Single-state substitution G[r ⇒ t] (Definition 11 note)."""
if r in (self.src, self.sink):
return r
t = self._new()
self._label[t] = new_label
for v in self._pred.get(r, set()) - {r}:
self.add_edge(v, t)
for w in self._succ.get(r, set()) - {r}:
self.add_edge(t, w)
if r in self._succ.get(r, set()):
self.add_edge(t, t)
self.rm_state(r)
return t
def epsilon_closure(self):
"""G* (Definition 25, TODS 2010). Add self-loops for + states and ε-transitive closure."""
G = self.copy()
changed = True
while changed:
changed = False
for n in list(G._succ.keys()):
lab = G._label.get(n)
if lab and (lab.endswith('+') or lab.endswith('+?')):
if not G.has_edge(n, n):
G.add_edge(n, n)
changed = True
for n in list(G._succ.keys()):
for m in list(G._succ.get(n, set())):
mlab = G._label.get(m)
if mlab == 'ε':
for mp in list(G._succ.get(m, set())):
if mp != n and not G.has_edge(n, mp):
G.add_edge(n, mp)
changed = True
return G
def __repr__(self):
return f"SOA(nodes={len(self._succ)}, special={self.num_non_special()})"