grammar-inference-engine/tests/test_ensemble.py
tobjend 9045769d57
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
ci/woodpecker/pr/woodpecker Pipeline was successful
feat: core+outlier analysis via min_coverage parameter, 6 new tests
2026-07-01 15:09:10 +02:00

265 lines
8 KiB
Python

"""Tests for infer_ensemble — runs CRX, iDRegEx, and kOREInference, picks best by MDL."""
from bex.ensemble import infer_ensemble
from bex.idregex import is_deterministic
from bex.kore import kOREInference
# ── Basic ensemble runs ──
def test_ensemble_returns_dict():
seqs = [['a', 'b', 'c'], ['a', 'b', 'c', 'd']]
result = infer_ensemble(seqs, kmax=2, N=3)
assert isinstance(result, dict)
assert 'best' in result
assert 'all' in result
assert 'why' in result
def test_ensemble_best_not_none():
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, kmax=2, N=3)
assert result['best'] is not None
assert result['best']['grammar'] is not None
assert result['best']['algorithm'] in ('CRX', 'iDRegEx', 'kOREInference')
assert result['best']['mdl_score'] is not None
def test_ensemble_runs_all_three():
seqs = [['a', 'b', 'c'], ['a', 'b', 'c', 'd']]
result = infer_ensemble(seqs, kmax=2, N=3)
algos = {a['algorithm'] for a in result['all']}
assert 'CRX' in algos
# iDRegEx and kOREInference may fail stochastically, so at least CRX
assert len(result['all']) >= 1
def test_ensemble_all_results_have_scores():
seqs = [['a', 'b'], ['a', 'b', 'b']]
result = infer_ensemble(seqs, kmax=2, N=3)
for entry in result['all']:
assert 'algorithm' in entry
assert 'grammar' in entry
assert 'mdl_score' in entry
assert isinstance(entry['mdl_score'], (int, float))
def test_ensemble_deterministic_results():
seqs = [['x', 'y'], ['x', 'z']]
result = infer_ensemble(seqs, kmax=2, N=3)
if result['best']:
assert is_deterministic(result['best']['grammar'])
# ── prefer parameter tests ──
def test_prefer_crx():
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, prefer='crx')
assert result['best']['algorithm'] == 'CRX'
assert len(result['all']) == 1
def test_prefer_idregex():
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, prefer='idregex', kmax=2, N=5)
assert result['best']['algorithm'] == 'iDRegEx'
assert len(result['all']) == 1
def test_prefer_koreinference():
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, prefer='koreinference', kmax=2, N=5)
assert result['best']['algorithm'] == 'kOREInference'
assert len(result['all']) == 1
def test_prefer_case_insensitive():
seqs = [['a', 'b']]
r1 = infer_ensemble(seqs, prefer='CRX')
r2 = infer_ensemble(seqs, prefer='Crx')
assert r1['best']['algorithm'] == r2['best']['algorithm']
def test_prefer_unknown_falls_back():
seqs = [['a', 'b']]
result = infer_ensemble(seqs, prefer='unknown')
assert result['best'] is not None
assert len(result['all']) >= 1
# ── Edge cases ──
def test_ensemble_empty_input():
result = infer_ensemble([], kmax=2, N=3)
assert result['best'] is None or result['best']['grammar'] is not None
def test_ensemble_single_sequence():
result = infer_ensemble([['a', 'b', 'c']], kmax=2, N=3)
assert result['best'] is not None
assert result['best']['grammar'] is not None
def test_ensemble_many_identical():
seqs = [['a', 'b', 'c']] * 10
result = infer_ensemble(seqs, kmax=2, N=3)
assert result['best'] is not None
def test_ensemble_linear_data():
seqs = [
['file', 'template', 'command', 'set_fact', 'shell'],
['file', 'template', 'command', 'set_fact', 'shell', 'wait_for'],
]
result = infer_ensemble(seqs, kmax=2, N=3)
if result['best']:
g = result['best']['grammar']
assert 'file' in g and 'template' in g and 'shell' in g
def test_ensemble_branching_data():
seqs = [
['file', 'template', 'setup', 'shell'],
['file', 'template', 'deploy', 'shell'],
]
result = infer_ensemble(seqs, kmax=2, N=5)
if result['best']:
g = result['best']['grammar']
assert is_deterministic(g)
assert 'file' in g and 'template' in g and 'shell' in g
def test_ensemble_why_includes_scores():
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, kmax=2, N=3)
assert 'CRX' in result['why']
assert 'selected' in result['why']
assert 'MDL' in result['why'] or 'score' in result['why'].lower()
def test_ensemble_ordering_best_first():
seqs = [['a', 'b', 'c'], ['a', 'b']]
result = infer_ensemble(seqs, kmax=2, N=3)
if result['best']:
assert result['all'][0]['algorithm'] == result['best']['algorithm']
assert result['all'][0]['mdl_score'] <= result['all'][-1]['mdl_score']
# ── Stochastic stability tests ──
def test_ensemble_stable_on_simple_data():
for _ in range(3):
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, kmax=2, N=3)
if result['best']:
assert 'a' in result['best']['grammar']
assert 'b' in result['best']['grammar']
def test_ensemble_crx_always_present():
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, kmax=2, N=3)
crx_results = [a for a in result['all'] if a['algorithm'] == 'CRX']
assert len(crx_results) == 1
# ── min_coverage / core analysis tests ──
def test_core_not_included_when_coverage_1():
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, min_coverage=1.0)
assert 'core' not in result
def test_core_included_when_coverage_lt_1():
seqs = [['a', 'b'], ['a', 'b', 'c']]
result = infer_ensemble(seqs, min_coverage=0.8)
assert 'core' in result
assert 'grammar' in result['core']
assert 'coverage' in result['core']
assert 'outliers' in result['core']
assert 'outlier_count' in result['core']
def test_core_outlier_detection():
seqs = [
['fail', 'package', 'file', 'service'],
['fail', 'package', 'file', 'service'],
['fail', 'package', 'file', 'service', 'npm'],
['fail', 'package', 'file', 'service', 'npm', 'pip'],
]
result = infer_ensemble(seqs, min_coverage=0.7)
assert 'core' in result
c = result['core']
assert c['outlier_count'] >= 1
assert 'npm' in c['grammar'] or 'service' in c['grammar']
def test_core_all_identical():
seqs = [['a', 'b', 'c']] * 10
result = infer_ensemble(seqs, min_coverage=0.8)
assert 'core' in result
assert result['core']['outlier_count'] == 0
assert 'a' in result['core']['grammar']
def test_core_coverage_ratio():
seqs = [
['a', 'b', 'c'],
['a', 'b', 'c'],
['a', 'b', 'c', 'd'],
['a', 'b', 'c', 'd', 'e'],
]
result = infer_ensemble(seqs, min_coverage=0.7)
if 'core' in result:
c = result['core']
assert c['outlier_count'] >= 1
assert len(c['outliers']) >= 1
assert c['coverage'] >= 0.5
def test_core_empty_sequences():
result = infer_ensemble([], min_coverage=0.8)
assert 'core' in result
assert result['core']['grammar'] is not None
def run_all():
tests = [
test_ensemble_returns_dict,
test_ensemble_best_not_none,
test_ensemble_runs_all_three,
test_ensemble_all_results_have_scores,
test_ensemble_deterministic_results,
test_prefer_crx,
test_prefer_idregex,
test_prefer_koreinference,
test_prefer_case_insensitive,
test_prefer_unknown_falls_back,
test_ensemble_empty_input,
test_ensemble_single_sequence,
test_ensemble_many_identical,
test_ensemble_linear_data,
test_ensemble_branching_data,
test_ensemble_why_includes_scores,
test_ensemble_ordering_best_first,
test_ensemble_stable_on_simple_data,
test_ensemble_crx_always_present,
]
passed = 0
failed = 0
for t in tests:
try:
t()
passed += 1
except Exception as e:
import traceback
print(f" FAIL {t.__name__}: {e}")
traceback.print_exc()
failed += 1
print(f"\n{passed} passed, {failed} failed")
if __name__ == '__main__':
run_all()