grammar-inference-engine/bex/twotinf.py

36 lines
1.1 KiB
Python
Raw Permalink Normal View History

"""2T-INF — Build SOA from 2-grams (Algorithm 1, TODS 2010)."""
from .soa import SOA
def build_soa(sequences):
"""
| Algorithm 1: 2T-INF |
Input: finite set of sample strings S
Output: SOA G such that S L(G)
For each string a₁...aₙ in S:
add edges (src, a₁), (a₁, a₂), ..., (aₙ, sink)
"""
G = SOA()
symbol_states = {}
for seq in sequences:
if not seq:
if not G.has_edge(G.src, G.sink):
G.add_edge(G.src, G.sink)
continue
for i, token in enumerate(seq):
if token not in symbol_states:
symbol_states[token] = G.add_state(token)
if i == 0:
G.add_edge(G.src, symbol_states[token])
if i == len(seq) - 1:
G.add_edge(symbol_states[token], G.sink)
if i + 1 < len(seq):
nxt = seq[i + 1]
if nxt not in symbol_states:
symbol_states[nxt] = G.add_state(nxt)
G.add_edge(symbol_states[token], symbol_states[nxt])
return G