grammar-inference-engine/bex/cli.py
tobjend 7c00c6713d Initial commit: BEX-based grammar inference engine
- CRX: direct CHARE inference (Algorithm 7, TODS 2010)
- iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010)
- RWR₀: SORE repair (Algorithm 6, TODS 2010)
- rwr²: k-ORE extraction (Algorithm 3, arXiv 2010)
- SOA, k-OA, iKoa, 2T-INF, Baum-Welch
- Ansible role grammar adapter
- Generic YAML key-path converter
- 28 tests, all passing
2026-07-01 08:01:16 +02:00

145 lines
5.1 KiB
Python

"""
CLI — Command-Line Interface for bex YAML Grammar Inference.
Usage:
python -m bex --dir roles/ --k-max 5
python -m bex --dir playbooks/ --context tasks
python -m bex --dir roles/ --output template.yaml
"""
import argparse
import os
import sys
import glob
from .tokenizer import YAMLTokenizer
from .kore import kOREInference
from .template import generate_template
from .ilocal import iLocal, extract_contexts_from_file, reduce_contexts
def find_yaml_files(directory):
"""Findet alle YAML-Dateien in einem Verzeichnis (rekursiv)."""
patterns = ['**/*.yml', '**/*.yaml']
files = []
for pattern in patterns:
files.extend(glob.glob(os.path.join(directory, pattern), recursive=True))
return sorted(files)
def main():
parser = argparse.ArgumentParser(
description='bex — BEX-based YAML Grammar Inference',
)
parser.add_argument('--dir', type=str, default='roles/',
help='Verzeichnis mit YAML-Dateien (default: roles/)')
parser.add_argument('--k-max', type=int, default=5,
help='Max k für k-ORE-Inferenz (default: 5)')
parser.add_argument('--context', type=str, default=None,
help='Auf spezifischen Container-Key beschränken (z.B. tasks)')
parser.add_argument('--output', type=str, default=None,
help='Output-Datei für Template (default: stdout)')
parser.add_argument('--ilocal', action='store_true',
help='iLocal-Kontextanalyse durchführen')
parser.add_argument('--crx', action='store_true',
help='CRX (direct CHARE inference) verwenden')
parser.add_argument('--verbose', '-v', action='store_true',
help='Ausführliche Ausgabe')
parser.add_argument('--stats', action='store_true',
help='Zeige Token-Statistiken')
args = parser.parse_args()
if not os.path.isdir(args.dir):
print(f"Fehler: Verzeichnis '{args.dir}' nicht gefunden.", file=sys.stderr)
sys.exit(1)
yaml_files = find_yaml_files(args.dir)
if not yaml_files:
print(f"Keine YAML-Dateien in '{args.dir}' gefunden.", file=sys.stderr)
sys.exit(1)
print(f"Gefundene YAML-Dateien: {len(yaml_files)}", file=sys.stderr)
if args.ilocal:
print("\n=== iLocal: Kontext-Extraktion ===", file=sys.stderr)
all_contexts = {}
for f in yaml_files:
contexts = extract_contexts_from_file(f)
for ctx, seqs in contexts.items():
if ctx not in all_contexts:
all_contexts[ctx] = []
all_contexts[ctx].extend(seqs)
reduced = reduce_contexts(all_contexts)
print(f" Kontexte gefunden: {len(reduced)}", file=sys.stderr)
for ctx, seqs in sorted(reduced.items()):
lengths = [len(s) for s in seqs]
print(f" {ctx}: {len(seqs)} Sequenzen, "
f"Längen {min(lengths)}-{max(lengths)}, "
f"unique_seqs={len(set(tuple(s) for s in seqs))}",
file=sys.stderr)
print("\n=== Tokenisierung ===", file=sys.stderr)
tokenizer = YAMLTokenizer(resolve_includes=False)
all_sequences = []
container_sequences = {}
for f in yaml_files:
try:
seq = tokenizer.tokenize_file(f)
if seq:
all_sequences.append(seq)
if args.verbose:
print(f" {os.path.relpath(f)}: {seq}", file=sys.stderr)
except Exception as e:
if args.verbose:
print(f" Fehler in {f}: {e}", file=sys.stderr)
if not all_sequences:
print("Keine Sequenzen extrahiert.", file=sys.stderr)
sys.exit(1)
print(f" Sequenzen extrahiert: {len(all_sequences)}", file=sys.stderr)
lengths = [len(s) for s in all_sequences]
print(f" Längen: min={min(lengths)}, max={max(lengths)}, "
f"avg={sum(lengths)/len(lengths):.1f}", file=sys.stderr)
if args.stats:
stats = tokenizer.get_statistics()
print("\n=== Token-Statistiken ===", file=sys.stderr)
for token, count in list(stats.items())[:30]:
print(f" {token}: {count}", file=sys.stderr)
print("\n=== k-ORE Inferenz ===", file=sys.stderr)
kore = kOREInference(k_max=args.k_max)
if args.crx:
result = kore.infer_with_crx(all_sequences)
_, expr, method = result
print(f" Methode: {method}", file=sys.stderr)
else:
result = kore.infer(all_sequences)
if result:
_, expr, k = result
print(f" Bestes k: {k}", file=sys.stderr)
else:
expr = ""
print(" Kein Ergebnis", file=sys.stderr)
print(f" Inferierter Ausdruck: {expr}", file=sys.stderr)
print("\n=== One-Shot Template ===", file=sys.stderr)
print(file=sys.stderr)
template = generate_template(expr, context_key=args.context)
if args.output:
with open(args.output, 'w') as f:
f.write(template)
print(f"Template geschrieben nach: {args.output}", file=sys.stderr)
else:
print(template)
if __name__ == '__main__':
main()