"""2T-INF — Build SOA from 2-grams (Algorithm 1, TODS 2010).""" from .soa import SOA def build_soa(sequences): """ |———— Algorithm 1: 2T-INF ————| Input: finite set of sample strings S Output: SOA G such that S ⊆ L(G) For each string a₁...aₙ in S: add edges (src, a₁), (a₁, a₂), ..., (aₙ, sink) """ G = SOA() symbol_states = {} for seq in sequences: if not seq: if not G.has_edge(G.src, G.sink): G.add_edge(G.src, G.sink) continue for i, token in enumerate(seq): if token not in symbol_states: symbol_states[token] = G.add_state(token) if i == 0: G.add_edge(G.src, symbol_states[token]) if i == len(seq) - 1: G.add_edge(symbol_states[token], G.sink) if i + 1 < len(seq): nxt = seq[i + 1] if nxt not in symbol_states: symbol_states[nxt] = G.add_state(nxt) G.add_edge(symbol_states[token], symbol_states[nxt]) return G