145 lines
5 KiB
Python
145 lines
5 KiB
Python
"""
|
|
CLI — Command-Line Interface for bex YAML Grammar Inference.
|
|
|
|
Usage:
|
|
python -m bex --dir roles/ --k-max 5
|
|
python -m bex --dir playbooks/ --context tasks
|
|
python -m bex --dir roles/ --output template.yaml
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
import glob
|
|
|
|
from .tokenizer import YAMLTokenizer
|
|
from .kore import kOREInference
|
|
from .template import generate_template
|
|
from .ilocal import iLocal, extract_contexts_from_file, reduce_contexts
|
|
|
|
|
|
def find_yaml_files(directory):
|
|
"""Find all YAML files in a directory (recursive)."""
|
|
patterns = ['**/*.yml', '**/*.yaml']
|
|
files = []
|
|
for pattern in patterns:
|
|
files.extend(glob.glob(os.path.join(directory, pattern), recursive=True))
|
|
return sorted(files)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='bex — BEX-based YAML Grammar Inference',
|
|
)
|
|
parser.add_argument('--dir', type=str, default='roles/',
|
|
help='Directory with YAML files (default: roles/)')
|
|
parser.add_argument('--k-max', type=int, default=5,
|
|
help='Max k for k-ORE inference (default: 5)')
|
|
parser.add_argument('--context', type=str, default=None,
|
|
help='Restrict to specific container key (e.g. tasks)')
|
|
parser.add_argument('--output', type=str, default=None,
|
|
help='Output file for template (default: stdout)')
|
|
parser.add_argument('--ilocal', action='store_true',
|
|
help='Run iLocal context analysis')
|
|
parser.add_argument('--crx', action='store_true',
|
|
help='Use CRX (direct CHARE inference)')
|
|
parser.add_argument('--verbose', '-v', action='store_true',
|
|
help='Verbose output')
|
|
parser.add_argument('--stats', action='store_true',
|
|
help='Show token statistics')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not os.path.isdir(args.dir):
|
|
print(f"Error: directory '{args.dir}' not found.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
yaml_files = find_yaml_files(args.dir)
|
|
if not yaml_files:
|
|
print(f"No YAML files found in '{args.dir}'.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(f"Found YAML files: {len(yaml_files)}", file=sys.stderr)
|
|
|
|
if args.ilocal:
|
|
print("\n=== iLocal: Context Extraction ===", file=sys.stderr)
|
|
all_contexts = {}
|
|
for f in yaml_files:
|
|
contexts = extract_contexts_from_file(f)
|
|
for ctx, seqs in contexts.items():
|
|
if ctx not in all_contexts:
|
|
all_contexts[ctx] = []
|
|
all_contexts[ctx].extend(seqs)
|
|
|
|
reduced = reduce_contexts(all_contexts)
|
|
print(f" Contexts found: {len(reduced)}", file=sys.stderr)
|
|
for ctx, seqs in sorted(reduced.items()):
|
|
lengths = [len(s) for s in seqs]
|
|
print(f" {ctx}: {len(seqs)} sequences, "
|
|
f"lengths {min(lengths)}-{max(lengths)}, "
|
|
f"unique_seqs={len(set(tuple(s) for s in seqs))}",
|
|
file=sys.stderr)
|
|
|
|
print("\n=== Tokenisierung ===", file=sys.stderr)
|
|
tokenizer = YAMLTokenizer(resolve_includes=False)
|
|
all_sequences = []
|
|
container_sequences = {}
|
|
|
|
for f in yaml_files:
|
|
try:
|
|
seq = tokenizer.tokenize_file(f)
|
|
if seq:
|
|
all_sequences.append(seq)
|
|
if args.verbose:
|
|
print(f" {os.path.relpath(f)}: {seq}", file=sys.stderr)
|
|
except Exception as e:
|
|
if args.verbose:
|
|
print(f" Error in {f}: {e}", file=sys.stderr)
|
|
|
|
if not all_sequences:
|
|
print("No sequences extracted.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(f" Sequences extracted: {len(all_sequences)}", file=sys.stderr)
|
|
lengths = [len(s) for s in all_sequences]
|
|
print(f" Lengths: min={min(lengths)}, max={max(lengths)}, "
|
|
f"avg={sum(lengths)/len(lengths):.1f}", file=sys.stderr)
|
|
|
|
if args.stats:
|
|
stats = tokenizer.get_statistics()
|
|
print("\n=== Token Statistics ===", file=sys.stderr)
|
|
for token, count in list(stats.items())[:30]:
|
|
print(f" {token}: {count}", file=sys.stderr)
|
|
|
|
print("\n=== k-ORE Inference ===", file=sys.stderr)
|
|
kore = kOREInference(k_max=args.k_max)
|
|
|
|
if args.crx:
|
|
result = kore.infer_with_crx(all_sequences)
|
|
_, expr, method = result
|
|
print(f" Method: {method}", file=sys.stderr)
|
|
else:
|
|
result = kore.infer(all_sequences)
|
|
if result:
|
|
_, expr, k = result
|
|
print(f" Bestes k: {k}", file=sys.stderr)
|
|
else:
|
|
expr = "∅"
|
|
print(" Kein Ergebnis", file=sys.stderr)
|
|
|
|
print(f" Inferred expression: {expr}", file=sys.stderr)
|
|
|
|
print("\n=== One-Shot Template ===", file=sys.stderr)
|
|
print(file=sys.stderr)
|
|
template = generate_template(expr, context_key=args.context)
|
|
|
|
if args.output:
|
|
with open(args.output, 'w') as f:
|
|
f.write(template)
|
|
print(f"Template written to: {args.output}", file=sys.stderr)
|
|
else:
|
|
print(template)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|