"""Tests for infer_ensemble — runs CRX, iDRegEx, and kOREInference, picks best by MDL.""" from bex.ensemble import infer_ensemble from bex.idregex import is_deterministic from bex.kore import kOREInference # ── Basic ensemble runs ── def test_ensemble_returns_dict(): seqs = [['a', 'b', 'c'], ['a', 'b', 'c', 'd']] result = infer_ensemble(seqs, kmax=2, N=3) assert isinstance(result, dict) assert 'best' in result assert 'all' in result assert 'why' in result def test_ensemble_best_not_none(): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, kmax=2, N=3) assert result['best'] is not None assert result['best']['grammar'] is not None assert result['best']['algorithm'] in ('CRX', 'iDRegEx', 'kOREInference') assert result['best']['mdl_score'] is not None def test_ensemble_runs_all_three(): seqs = [['a', 'b', 'c'], ['a', 'b', 'c', 'd']] result = infer_ensemble(seqs, kmax=2, N=3) algos = {a['algorithm'] for a in result['all']} assert 'CRX' in algos # iDRegEx and kOREInference may fail stochastically, so at least CRX assert len(result['all']) >= 1 def test_ensemble_all_results_have_scores(): seqs = [['a', 'b'], ['a', 'b', 'b']] result = infer_ensemble(seqs, kmax=2, N=3) for entry in result['all']: assert 'algorithm' in entry assert 'grammar' in entry assert 'mdl_score' in entry assert isinstance(entry['mdl_score'], (int, float)) def test_ensemble_deterministic_results(): seqs = [['x', 'y'], ['x', 'z']] result = infer_ensemble(seqs, kmax=2, N=3) if result['best']: assert is_deterministic(result['best']['grammar']) # ── prefer parameter tests ── def test_prefer_crx(): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, prefer='crx') assert result['best']['algorithm'] == 'CRX' assert len(result['all']) == 1 def test_prefer_idregex(): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, prefer='idregex', kmax=2, N=5) assert result['best']['algorithm'] == 'iDRegEx' assert len(result['all']) == 1 def test_prefer_koreinference(): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, prefer='koreinference', kmax=2, N=5) assert result['best']['algorithm'] == 'kOREInference' assert len(result['all']) == 1 def test_prefer_case_insensitive(): seqs = [['a', 'b']] r1 = infer_ensemble(seqs, prefer='CRX') r2 = infer_ensemble(seqs, prefer='Crx') assert r1['best']['algorithm'] == r2['best']['algorithm'] def test_prefer_unknown_falls_back(): seqs = [['a', 'b']] result = infer_ensemble(seqs, prefer='unknown') assert result['best'] is not None assert len(result['all']) >= 1 # ── Edge cases ── def test_ensemble_empty_input(): result = infer_ensemble([], kmax=2, N=3) assert result['best'] is None or result['best']['grammar'] is not None def test_ensemble_single_sequence(): result = infer_ensemble([['a', 'b', 'c']], kmax=2, N=3) assert result['best'] is not None assert result['best']['grammar'] is not None def test_ensemble_many_identical(): seqs = [['a', 'b', 'c']] * 10 result = infer_ensemble(seqs, kmax=2, N=3) assert result['best'] is not None def test_ensemble_linear_data(): seqs = [ ['file', 'template', 'command', 'set_fact', 'shell'], ['file', 'template', 'command', 'set_fact', 'shell', 'wait_for'], ] result = infer_ensemble(seqs, kmax=2, N=3) if result['best']: g = result['best']['grammar'] assert 'file' in g and 'template' in g and 'shell' in g def test_ensemble_branching_data(): seqs = [ ['file', 'template', 'setup', 'shell'], ['file', 'template', 'deploy', 'shell'], ] result = infer_ensemble(seqs, kmax=2, N=5) if result['best']: g = result['best']['grammar'] assert is_deterministic(g) assert 'file' in g and 'template' in g and 'shell' in g def test_ensemble_why_includes_scores(): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, kmax=2, N=3) assert 'CRX' in result['why'] assert 'selected' in result['why'] assert 'MDL' in result['why'] or 'score' in result['why'].lower() def test_ensemble_ordering_best_first(): seqs = [['a', 'b', 'c'], ['a', 'b']] result = infer_ensemble(seqs, kmax=2, N=3) if result['best']: assert result['all'][0]['algorithm'] == result['best']['algorithm'] assert result['all'][0]['mdl_score'] <= result['all'][-1]['mdl_score'] # ── Stochastic stability tests ── def test_ensemble_stable_on_simple_data(): for _ in range(3): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, kmax=2, N=3) if result['best']: assert 'a' in result['best']['grammar'] assert 'b' in result['best']['grammar'] def test_ensemble_crx_always_present(): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, kmax=2, N=3) crx_results = [a for a in result['all'] if a['algorithm'] == 'CRX'] assert len(crx_results) == 1 # ── min_coverage / core analysis tests ── def test_core_not_included_when_coverage_1(): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, min_coverage=1.0) assert 'core' not in result def test_core_included_when_coverage_lt_1(): seqs = [['a', 'b'], ['a', 'b', 'c']] result = infer_ensemble(seqs, min_coverage=0.8) assert 'core' in result assert 'grammar' in result['core'] assert 'coverage' in result['core'] assert 'outliers' in result['core'] assert 'outlier_count' in result['core'] def test_core_outlier_detection(): seqs = [ ['fail', 'package', 'file', 'service'], ['fail', 'package', 'file', 'service'], ['fail', 'package', 'file', 'service', 'npm'], ['fail', 'package', 'file', 'service', 'npm', 'pip'], ] result = infer_ensemble(seqs, min_coverage=0.7) assert 'core' in result c = result['core'] assert c['outlier_count'] >= 1 assert 'npm' in c['grammar'] or 'service' in c['grammar'] def test_core_all_identical(): seqs = [['a', 'b', 'c']] * 10 result = infer_ensemble(seqs, min_coverage=0.8) assert 'core' in result assert result['core']['outlier_count'] == 0 assert 'a' in result['core']['grammar'] def test_core_coverage_ratio(): seqs = [ ['a', 'b', 'c'], ['a', 'b', 'c'], ['a', 'b', 'c', 'd'], ['a', 'b', 'c', 'd', 'e'], ] result = infer_ensemble(seqs, min_coverage=0.7) if 'core' in result: c = result['core'] assert c['outlier_count'] >= 1 assert len(c['outliers']) >= 1 assert c['coverage'] >= 0.5 def test_core_empty_sequences(): result = infer_ensemble([], min_coverage=0.8) assert 'core' in result assert result['core']['grammar'] is not None def run_all(): tests = [ test_ensemble_returns_dict, test_ensemble_best_not_none, test_ensemble_runs_all_three, test_ensemble_all_results_have_scores, test_ensemble_deterministic_results, test_prefer_crx, test_prefer_idregex, test_prefer_koreinference, test_prefer_case_insensitive, test_prefer_unknown_falls_back, test_ensemble_empty_input, test_ensemble_single_sequence, test_ensemble_many_identical, test_ensemble_linear_data, test_ensemble_branching_data, test_ensemble_why_includes_scores, test_ensemble_ordering_best_first, test_ensemble_stable_on_simple_data, test_ensemble_crx_always_present, ] passed = 0 failed = 0 for t in tests: try: t() passed += 1 except Exception as e: import traceback print(f" FAIL {t.__name__}: {e}") traceback.print_exc() failed += 1 print(f"\n{passed} passed, {failed} failed") if __name__ == '__main__': run_all()