194 lines
5.7 KiB
Python
194 lines
5.7 KiB
Python
|
|
"""SOA — Single Occurrence Automaton (Definition 6, TODS 2010)."""
|
|||
|
|
|
|||
|
|
import copy
|
|||
|
|
from .expr import concat, disj, star, optional
|
|||
|
|
|
|||
|
|
|
|||
|
|
class SOA:
|
|||
|
|
"""
|
|||
|
|
Node-labeled automaton (Definition 6, TODS 2010).
|
|||
|
|
|
|||
|
|
V = {src, sink} ∪ symbol-labeled states.
|
|||
|
|
E ⊆ V × V, unlabeled edges.
|
|||
|
|
Walk src=v₁,v₂,...,vₙ₊₁=sink accepts word lab(v₂)...lab(vₙ).
|
|||
|
|
|
|||
|
|
States are proper SOREs, pairwise alphabet-disjoint (Definition 10).
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self):
|
|||
|
|
self._next = 0
|
|||
|
|
self._succ = {}
|
|||
|
|
self._pred = {}
|
|||
|
|
self._label = {}
|
|||
|
|
self.src = self._new()
|
|||
|
|
self.sink = self._new()
|
|||
|
|
|
|||
|
|
def _new(self):
|
|||
|
|
n = self._next
|
|||
|
|
self._next += 1
|
|||
|
|
self._succ[n] = set()
|
|||
|
|
self._pred[n] = set()
|
|||
|
|
self._label[n] = None
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
def add_state(self, label):
|
|||
|
|
n = self._new()
|
|||
|
|
self._label[n] = label
|
|||
|
|
return n
|
|||
|
|
|
|||
|
|
def add_edge(self, f, t):
|
|||
|
|
self._succ[f].add(t)
|
|||
|
|
self._pred[t].add(f)
|
|||
|
|
|
|||
|
|
def rm_edge(self, f, t):
|
|||
|
|
self._succ[f].discard(t)
|
|||
|
|
self._pred[t].discard(f)
|
|||
|
|
|
|||
|
|
def rm_state(self, n):
|
|||
|
|
if n in (self.src, self.sink):
|
|||
|
|
return
|
|||
|
|
for p in list(self._pred[n]):
|
|||
|
|
self.rm_edge(p, n)
|
|||
|
|
for s in list(self._succ[n]):
|
|||
|
|
self.rm_edge(n, s)
|
|||
|
|
del self._label[n]
|
|||
|
|
del self._succ[n]
|
|||
|
|
del self._pred[n]
|
|||
|
|
|
|||
|
|
def label(self, n):
|
|||
|
|
return self._label.get(n)
|
|||
|
|
|
|||
|
|
def set_label(self, n, lab):
|
|||
|
|
self._label[n] = lab
|
|||
|
|
|
|||
|
|
def succ(self, n):
|
|||
|
|
return set(self._succ.get(n, set()))
|
|||
|
|
|
|||
|
|
def pred(self, n):
|
|||
|
|
return set(self._pred.get(n, set()))
|
|||
|
|
|
|||
|
|
def has_edge(self, f, t):
|
|||
|
|
return t in self._succ.get(f, set())
|
|||
|
|
|
|||
|
|
def states(self):
|
|||
|
|
return [n for n in self._succ if n not in (self.src, self.sink) and self._label.get(n) is not None]
|
|||
|
|
|
|||
|
|
def _pred_plus(self, n):
|
|||
|
|
r = set(self._pred.get(n, set()))
|
|||
|
|
if self._label.get(n) and self._label[n].endswith('+'):
|
|||
|
|
r.add(n)
|
|||
|
|
return r
|
|||
|
|
|
|||
|
|
def _succ_plus(self, n):
|
|||
|
|
r = set(self._succ.get(n, set()))
|
|||
|
|
if self._label.get(n) and self._label[n].endswith('+'):
|
|||
|
|
r.add(n)
|
|||
|
|
return r
|
|||
|
|
|
|||
|
|
def copy(self):
|
|||
|
|
return copy.deepcopy(self)
|
|||
|
|
|
|||
|
|
def accept(self, w):
|
|||
|
|
cur = {self.src}
|
|||
|
|
for sym in w:
|
|||
|
|
nxt = set()
|
|||
|
|
for s in cur:
|
|||
|
|
for t in self._succ.get(s, set()):
|
|||
|
|
if self._label.get(t) == sym:
|
|||
|
|
nxt.add(t)
|
|||
|
|
if not nxt:
|
|||
|
|
return False
|
|||
|
|
cur = nxt
|
|||
|
|
return any(self.sink in self._succ.get(s, set()) for s in cur)
|
|||
|
|
|
|||
|
|
def sink_reachable(self):
|
|||
|
|
seen = set()
|
|||
|
|
q = [self.src]
|
|||
|
|
while q:
|
|||
|
|
s = q.pop()
|
|||
|
|
if s == self.sink:
|
|||
|
|
return True
|
|||
|
|
if s in seen:
|
|||
|
|
continue
|
|||
|
|
seen.add(s)
|
|||
|
|
q.extend(self._succ.get(s, []))
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
def num_non_special(self):
|
|||
|
|
return sum(1 for n in self._succ if n not in (self.src, self.sink))
|
|||
|
|
|
|||
|
|
def is_final(self):
|
|||
|
|
ns = self.states()
|
|||
|
|
return len(ns) == 1 and self.has_edge(self.src, ns[0]) and self.has_edge(ns[0], self.sink)
|
|||
|
|
|
|||
|
|
def expression(self):
|
|||
|
|
if not self.is_final():
|
|||
|
|
return None
|
|||
|
|
return self._label[self.states()[0]]
|
|||
|
|
|
|||
|
|
def contract(self, r, s, new_label):
|
|||
|
|
"""
|
|||
|
|
State contraction G[r,s ⇒ t] (Definition 11, TODS 2010).
|
|||
|
|
|
|||
|
|
(1) Add t as new state with label new_label.
|
|||
|
|
(2) Every v ∈ Pred(r) − {r,s} → predecessor of t.
|
|||
|
|
(3) Every w ∈ Succ(s) − {r,s} → successor of t. [matching figures]
|
|||
|
|
(4) Loop t→t if r ∈ Succ(s).
|
|||
|
|
(5) Remove r, s and all edges.
|
|||
|
|
"""
|
|||
|
|
t = self._new()
|
|||
|
|
self._label[t] = new_label
|
|||
|
|
for v in self._pred.get(r, set()) - {r, s}:
|
|||
|
|
self.add_edge(v, t)
|
|||
|
|
for v in self._pred.get(s, set()) - {r, s}:
|
|||
|
|
self.add_edge(v, t)
|
|||
|
|
for w in self._succ.get(r, set()) - {r, s}:
|
|||
|
|
self.add_edge(t, w)
|
|||
|
|
for w in self._succ.get(s, set()) - {r, s}:
|
|||
|
|
self.add_edge(t, w)
|
|||
|
|
if r in self._succ.get(s, set()):
|
|||
|
|
self.add_edge(t, t)
|
|||
|
|
self.rm_state(r)
|
|||
|
|
self.rm_state(s)
|
|||
|
|
return t
|
|||
|
|
|
|||
|
|
def contract_single(self, r, new_label):
|
|||
|
|
"""Single-state substitution G[r ⇒ t] (Definition 11 note)."""
|
|||
|
|
if r in (self.src, self.sink):
|
|||
|
|
return r
|
|||
|
|
t = self._new()
|
|||
|
|
self._label[t] = new_label
|
|||
|
|
for v in self._pred.get(r, set()) - {r}:
|
|||
|
|
self.add_edge(v, t)
|
|||
|
|
for w in self._succ.get(r, set()) - {r}:
|
|||
|
|
self.add_edge(t, w)
|
|||
|
|
if r in self._succ.get(r, set()):
|
|||
|
|
self.add_edge(t, t)
|
|||
|
|
self.rm_state(r)
|
|||
|
|
return t
|
|||
|
|
|
|||
|
|
def epsilon_closure(self):
|
|||
|
|
"""G* (Definition 25, TODS 2010). Add self-loops for + states and ε-transitive closure."""
|
|||
|
|
G = self.copy()
|
|||
|
|
changed = True
|
|||
|
|
while changed:
|
|||
|
|
changed = False
|
|||
|
|
for n in list(G._succ.keys()):
|
|||
|
|
lab = G._label.get(n)
|
|||
|
|
if lab and (lab.endswith('+') or lab.endswith('+?')):
|
|||
|
|
if not G.has_edge(n, n):
|
|||
|
|
G.add_edge(n, n)
|
|||
|
|
changed = True
|
|||
|
|
for n in list(G._succ.keys()):
|
|||
|
|
for m in list(G._succ.get(n, set())):
|
|||
|
|
mlab = G._label.get(m)
|
|||
|
|
if mlab == 'ε':
|
|||
|
|
for mp in list(G._succ.get(m, set())):
|
|||
|
|
if mp != n and not G.has_edge(n, mp):
|
|||
|
|
G.add_edge(n, mp)
|
|||
|
|
changed = True
|
|||
|
|
return G
|
|||
|
|
|
|||
|
|
def __repr__(self):
|
|||
|
|
return f"SOA(nodes={len(self._succ)}, special={self.num_non_special()})"
|