- CRX: direct CHARE inference (Algorithm 7, TODS 2010) - iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010) - RWR₀: SORE repair (Algorithm 6, TODS 2010) - rwr²: k-ORE extraction (Algorithm 3, arXiv 2010) - SOA, k-OA, iKoa, 2T-INF, Baum-Welch - Ansible role grammar adapter - Generic YAML key-path converter - 28 tests, all passing
105 lines
3 KiB
Python
105 lines
3 KiB
Python
"""k-OA — k-Occurrence Automaton (Definition 4.1, arXiv 1004.2372).
|
|
|
|
A k-OA is like a SOA but each symbol appears at most k times as a state label.
|
|
"""
|
|
|
|
from .soa import SOA
|
|
from .expr import strip_k
|
|
|
|
|
|
class KOA(SOA):
|
|
"""k-Occurrence Automaton.
|
|
|
|
Same structure as SOA but each symbol may label up to k states.
|
|
"""
|
|
|
|
def __init__(self, k=1):
|
|
super().__init__()
|
|
self.k = k
|
|
self._symbol_count = {}
|
|
|
|
def add_state(self, label):
|
|
nid = super().add_state(label)
|
|
sym = strip_k(label)
|
|
self._symbol_count.setdefault(sym, 0)
|
|
self._symbol_count[sym] += 1
|
|
return nid
|
|
|
|
def remove_state(self, nid):
|
|
label = self._label.get(nid)
|
|
if label:
|
|
sym = strip_k(label)
|
|
self._symbol_count[sym] -= 1
|
|
super().rm_state(nid)
|
|
|
|
def count_symbol(self, symbol):
|
|
return self._symbol_count.get(strip_k(symbol), 0)
|
|
|
|
def symbol_ok(self, symbol):
|
|
return self.count_symbol(symbol) < self.k
|
|
|
|
def is_deterministic(self):
|
|
for n in self._succ:
|
|
label_map = {}
|
|
for t in self._succ[n]:
|
|
lab = self._label.get(t)
|
|
if lab:
|
|
base = strip_k(lab)
|
|
if base in label_map:
|
|
return False
|
|
label_map[base] = t
|
|
return True
|
|
|
|
def accept(self, w):
|
|
"""Accept using base symbols (strip k-markers from state labels)."""
|
|
cur = {self.src}
|
|
for sym in w:
|
|
nxt = set()
|
|
for s in cur:
|
|
for t in self._succ.get(s, set()):
|
|
lab = self._label.get(t)
|
|
if lab and strip_k(lab) == sym:
|
|
nxt.add(t)
|
|
if not nxt:
|
|
return False
|
|
cur = nxt
|
|
return any(self.sink in self._succ.get(s, set()) for s in cur)
|
|
|
|
def succ_labeled(self, nid, symbol):
|
|
return {t for t in self._succ.get(nid, set()) if strip_k(self._label.get(t) or '') == symbol}
|
|
|
|
|
|
def build_complete_koa(sequences, k):
|
|
"""Build complete k-OA Ck (Definition 4.2, arXiv 1004.2372).
|
|
|
|
For each a ∈ Σ(S), exactly k states labeled a (a_1 ... a_k).
|
|
- src connected to exactly one a_i for each a
|
|
- Every state has edge to every other state (except src)
|
|
- src → sink edge (for ε)
|
|
"""
|
|
G = KOA(k=k)
|
|
alphabet = set()
|
|
for seq in sequences:
|
|
for token in seq:
|
|
alphabet.add(token)
|
|
|
|
symbol_states = {}
|
|
for sym in alphabet:
|
|
state_ids = []
|
|
for i in range(1, k + 1):
|
|
nid = G.add_state(f"{sym}_{i}")
|
|
state_ids.append(nid)
|
|
G.add_edge(G.src, nid)
|
|
symbol_states[sym] = state_ids
|
|
|
|
all_states = [n for n in G._succ if n not in (G.src, G.sink)]
|
|
for s in all_states:
|
|
for t in all_states:
|
|
if s != t and not G.has_edge(s, t):
|
|
G.add_edge(s, t)
|
|
if not G.has_edge(s, G.sink):
|
|
G.add_edge(s, G.sink)
|
|
|
|
G.add_edge(G.src, G.sink)
|
|
|
|
return G, symbol_states
|