""" CLI — Command-Line Interface for bex YAML Grammar Inference. Usage: python -m bex --dir roles/ --k-max 5 python -m bex --dir playbooks/ --context tasks python -m bex --dir roles/ --output template.yaml """ import argparse import os import sys import glob from .tokenizer import YAMLTokenizer from .kore import kOREInference from .template import generate_template from .ilocal import iLocal, extract_contexts_from_file, reduce_contexts def find_yaml_files(directory): """Find all YAML files in a directory (recursive).""" patterns = ['**/*.yml', '**/*.yaml'] files = [] for pattern in patterns: files.extend(glob.glob(os.path.join(directory, pattern), recursive=True)) return sorted(files) def main(): parser = argparse.ArgumentParser( description='bex — BEX-based YAML Grammar Inference', ) parser.add_argument('--dir', type=str, default='roles/', help='Directory with YAML files (default: roles/)') parser.add_argument('--k-max', type=int, default=5, help='Max k for k-ORE inference (default: 5)') parser.add_argument('--context', type=str, default=None, help='Restrict to specific container key (e.g. tasks)') parser.add_argument('--output', type=str, default=None, help='Output file for template (default: stdout)') parser.add_argument('--ilocal', action='store_true', help='Run iLocal context analysis') parser.add_argument('--crx', action='store_true', help='Use CRX (direct CHARE inference)') parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output') parser.add_argument('--stats', action='store_true', help='Show token statistics') args = parser.parse_args() if not os.path.isdir(args.dir): print(f"Error: directory '{args.dir}' not found.", file=sys.stderr) sys.exit(1) yaml_files = find_yaml_files(args.dir) if not yaml_files: print(f"No YAML files found in '{args.dir}'.", file=sys.stderr) sys.exit(1) print(f"Found YAML files: {len(yaml_files)}", file=sys.stderr) if args.ilocal: print("\n=== iLocal: Context Extraction ===", file=sys.stderr) all_contexts = {} for f in yaml_files: contexts = extract_contexts_from_file(f) for ctx, seqs in contexts.items(): if ctx not in all_contexts: all_contexts[ctx] = [] all_contexts[ctx].extend(seqs) reduced = reduce_contexts(all_contexts) print(f" Contexts found: {len(reduced)}", file=sys.stderr) for ctx, seqs in sorted(reduced.items()): lengths = [len(s) for s in seqs] print(f" {ctx}: {len(seqs)} sequences, " f"lengths {min(lengths)}-{max(lengths)}, " f"unique_seqs={len(set(tuple(s) for s in seqs))}", file=sys.stderr) print("\n=== Tokenisierung ===", file=sys.stderr) tokenizer = YAMLTokenizer(resolve_includes=False) all_sequences = [] container_sequences = {} for f in yaml_files: try: seq = tokenizer.tokenize_file(f) if seq: all_sequences.append(seq) if args.verbose: print(f" {os.path.relpath(f)}: {seq}", file=sys.stderr) except Exception as e: if args.verbose: print(f" Error in {f}: {e}", file=sys.stderr) if not all_sequences: print("No sequences extracted.", file=sys.stderr) sys.exit(1) print(f" Sequences extracted: {len(all_sequences)}", file=sys.stderr) lengths = [len(s) for s in all_sequences] print(f" Lengths: min={min(lengths)}, max={max(lengths)}, " f"avg={sum(lengths)/len(lengths):.1f}", file=sys.stderr) if args.stats: stats = tokenizer.get_statistics() print("\n=== Token Statistics ===", file=sys.stderr) for token, count in list(stats.items())[:30]: print(f" {token}: {count}", file=sys.stderr) print("\n=== k-ORE Inference ===", file=sys.stderr) kore = kOREInference(k_max=args.k_max) if args.crx: result = kore.infer_with_crx(all_sequences) _, expr, method = result print(f" Method: {method}", file=sys.stderr) else: result = kore.infer(all_sequences) if result: _, expr, k = result print(f" Bestes k: {k}", file=sys.stderr) else: expr = "∅" print(" Kein Ergebnis", file=sys.stderr) print(f" Inferred expression: {expr}", file=sys.stderr) print("\n=== One-Shot Template ===", file=sys.stderr) print(file=sys.stderr) template = generate_template(expr, context_key=args.context) if args.output: with open(args.output, 'w') as f: f.write(template) print(f"Template written to: {args.output}", file=sys.stderr) else: print(template) if __name__ == '__main__': main()