- CRX: direct CHARE inference (Algorithm 7, TODS 2010) - iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010) - RWR₀: SORE repair (Algorithm 6, TODS 2010) - rwr²: k-ORE extraction (Algorithm 3, arXiv 2010) - SOA, k-OA, iKoa, 2T-INF, Baum-Welch - Ansible role grammar adapter - Generic YAML key-path converter - 28 tests, all passing
35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
"""2T-INF — Build SOA from 2-grams (Algorithm 1, TODS 2010)."""
|
|
|
|
from .soa import SOA
|
|
|
|
|
|
def build_soa(sequences):
|
|
"""
|
|
|———— Algorithm 1: 2T-INF ————|
|
|
Input: finite set of sample strings S
|
|
Output: SOA G such that S ⊆ L(G)
|
|
|
|
For each string a₁...aₙ in S:
|
|
add edges (src, a₁), (a₁, a₂), ..., (aₙ, sink)
|
|
"""
|
|
G = SOA()
|
|
symbol_states = {}
|
|
|
|
for seq in sequences:
|
|
if not seq:
|
|
if not G.has_edge(G.src, G.sink):
|
|
G.add_edge(G.src, G.sink)
|
|
continue
|
|
for i, token in enumerate(seq):
|
|
if token not in symbol_states:
|
|
symbol_states[token] = G.add_state(token)
|
|
if i == 0:
|
|
G.add_edge(G.src, symbol_states[token])
|
|
if i == len(seq) - 1:
|
|
G.add_edge(symbol_states[token], G.sink)
|
|
if i + 1 < len(seq):
|
|
nxt = seq[i + 1]
|
|
if nxt not in symbol_states:
|
|
symbol_states[nxt] = G.add_state(nxt)
|
|
G.add_edge(symbol_states[token], symbol_states[nxt])
|
|
return G
|