"""k-OA — k-Occurrence Automaton (Definition 4.1, arXiv 1004.2372). A k-OA is like a SOA but each symbol appears at most k times as a state label. """ from .soa import SOA from .expr import strip_k class KOA(SOA): """k-Occurrence Automaton. Same structure as SOA but each symbol may label up to k states. """ def __init__(self, k=1): super().__init__() self.k = k self._symbol_count = {} def add_state(self, label): nid = super().add_state(label) sym = strip_k(label) self._symbol_count.setdefault(sym, 0) self._symbol_count[sym] += 1 return nid def remove_state(self, nid): label = self._label.get(nid) if label: sym = strip_k(label) self._symbol_count[sym] -= 1 super().rm_state(nid) def count_symbol(self, symbol): return self._symbol_count.get(strip_k(symbol), 0) def symbol_ok(self, symbol): return self.count_symbol(symbol) < self.k def is_deterministic(self): for n in self._succ: label_map = {} for t in self._succ[n]: lab = self._label.get(t) if lab: base = strip_k(lab) if base in label_map: return False label_map[base] = t return True def accept(self, w): """Accept using base symbols (strip k-markers from state labels).""" cur = {self.src} for sym in w: nxt = set() for s in cur: for t in self._succ.get(s, set()): lab = self._label.get(t) if lab and strip_k(lab) == sym: nxt.add(t) if not nxt: return False cur = nxt return any(self.sink in self._succ.get(s, set()) for s in cur) def succ_labeled(self, nid, symbol): return {t for t in self._succ.get(nid, set()) if strip_k(self._label.get(t) or '') == symbol} def build_complete_koa(sequences, k): """Build complete k-OA Ck (Definition 4.2, arXiv 1004.2372). For each a ∈ Σ(S), exactly k states labeled a (a_1 ... a_k). - src connected to exactly one a_i for each a - Every state has edge to every other state (except src) - src → sink edge (for ε) """ G = KOA(k=k) alphabet = set() for seq in sequences: for token in seq: alphabet.add(token) symbol_states = {} for sym in alphabet: state_ids = [] for i in range(1, k + 1): nid = G.add_state(f"{sym}_{i}") state_ids.append(nid) G.add_edge(G.src, nid) symbol_states[sym] = state_ids all_states = [n for n in G._succ if n not in (G.src, G.sink)] for s in all_states: for t in all_states: if s != t and not G.has_edge(s, t): G.add_edge(s, t) if not G.has_edge(s, G.sink): G.add_edge(s, G.sink) G.add_edge(G.src, G.sink) return G, symbol_states