grammar-inference-engine/bex/cli.py

145 lines
5 KiB
Python

"""
CLI — Command-Line Interface for bex YAML Grammar Inference.
Usage:
python -m bex --dir roles/ --k-max 5
python -m bex --dir playbooks/ --context tasks
python -m bex --dir roles/ --output template.yaml
"""
import argparse
import os
import sys
import glob
from .tokenizer import YAMLTokenizer
from .kore import kOREInference
from .template import generate_template
from .ilocal import iLocal, extract_contexts_from_file, reduce_contexts
def find_yaml_files(directory):
"""Find all YAML files in a directory (recursive)."""
patterns = ['**/*.yml', '**/*.yaml']
files = []
for pattern in patterns:
files.extend(glob.glob(os.path.join(directory, pattern), recursive=True))
return sorted(files)
def main():
parser = argparse.ArgumentParser(
description='bex — BEX-based YAML Grammar Inference',
)
parser.add_argument('--dir', type=str, default='roles/',
help='Directory with YAML files (default: roles/)')
parser.add_argument('--k-max', type=int, default=5,
help='Max k for k-ORE inference (default: 5)')
parser.add_argument('--context', type=str, default=None,
help='Restrict to specific container key (e.g. tasks)')
parser.add_argument('--output', type=str, default=None,
help='Output file for template (default: stdout)')
parser.add_argument('--ilocal', action='store_true',
help='Run iLocal context analysis')
parser.add_argument('--crx', action='store_true',
help='Use CRX (direct CHARE inference)')
parser.add_argument('--verbose', '-v', action='store_true',
help='Verbose output')
parser.add_argument('--stats', action='store_true',
help='Show token statistics')
args = parser.parse_args()
if not os.path.isdir(args.dir):
print(f"Error: directory '{args.dir}' not found.", file=sys.stderr)
sys.exit(1)
yaml_files = find_yaml_files(args.dir)
if not yaml_files:
print(f"No YAML files found in '{args.dir}'.", file=sys.stderr)
sys.exit(1)
print(f"Found YAML files: {len(yaml_files)}", file=sys.stderr)
if args.ilocal:
print("\n=== iLocal: Context Extraction ===", file=sys.stderr)
all_contexts = {}
for f in yaml_files:
contexts = extract_contexts_from_file(f)
for ctx, seqs in contexts.items():
if ctx not in all_contexts:
all_contexts[ctx] = []
all_contexts[ctx].extend(seqs)
reduced = reduce_contexts(all_contexts)
print(f" Contexts found: {len(reduced)}", file=sys.stderr)
for ctx, seqs in sorted(reduced.items()):
lengths = [len(s) for s in seqs]
print(f" {ctx}: {len(seqs)} sequences, "
f"lengths {min(lengths)}-{max(lengths)}, "
f"unique_seqs={len(set(tuple(s) for s in seqs))}",
file=sys.stderr)
print("\n=== Tokenisierung ===", file=sys.stderr)
tokenizer = YAMLTokenizer(resolve_includes=False)
all_sequences = []
container_sequences = {}
for f in yaml_files:
try:
seq = tokenizer.tokenize_file(f)
if seq:
all_sequences.append(seq)
if args.verbose:
print(f" {os.path.relpath(f)}: {seq}", file=sys.stderr)
except Exception as e:
if args.verbose:
print(f" Error in {f}: {e}", file=sys.stderr)
if not all_sequences:
print("No sequences extracted.", file=sys.stderr)
sys.exit(1)
print(f" Sequences extracted: {len(all_sequences)}", file=sys.stderr)
lengths = [len(s) for s in all_sequences]
print(f" Lengths: min={min(lengths)}, max={max(lengths)}, "
f"avg={sum(lengths)/len(lengths):.1f}", file=sys.stderr)
if args.stats:
stats = tokenizer.get_statistics()
print("\n=== Token Statistics ===", file=sys.stderr)
for token, count in list(stats.items())[:30]:
print(f" {token}: {count}", file=sys.stderr)
print("\n=== k-ORE Inference ===", file=sys.stderr)
kore = kOREInference(k_max=args.k_max)
if args.crx:
result = kore.infer_with_crx(all_sequences)
_, expr, method = result
print(f" Method: {method}", file=sys.stderr)
else:
result = kore.infer(all_sequences)
if result:
_, expr, k = result
print(f" Bestes k: {k}", file=sys.stderr)
else:
expr = ""
print(" Kein Ergebnis", file=sys.stderr)
print(f" Inferred expression: {expr}", file=sys.stderr)
print("\n=== One-Shot Template ===", file=sys.stderr)
print(file=sys.stderr)
template = generate_template(expr, context_key=args.context)
if args.output:
with open(args.output, 'w') as f:
f.write(template)
print(f"Template written to: {args.output}", file=sys.stderr)
else:
print(template)
if __name__ == '__main__':
main()