- CRX: direct CHARE inference (Algorithm 7, TODS 2010) - iDRegEx: k-ORE inference (Algorithm 4, arXiv 2010) - RWR₀: SORE repair (Algorithm 6, TODS 2010) - rwr²: k-ORE extraction (Algorithm 3, arXiv 2010) - SOA, k-OA, iKoa, 2T-INF, Baum-Welch - Ansible role grammar adapter - Generic YAML key-path converter - 28 tests, all passing
145 lines
5.1 KiB
Python
145 lines
5.1 KiB
Python
"""
|
|
CLI — Command-Line Interface for bex YAML Grammar Inference.
|
|
|
|
Usage:
|
|
python -m bex --dir roles/ --k-max 5
|
|
python -m bex --dir playbooks/ --context tasks
|
|
python -m bex --dir roles/ --output template.yaml
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import sys
|
|
import glob
|
|
|
|
from .tokenizer import YAMLTokenizer
|
|
from .kore import kOREInference
|
|
from .template import generate_template
|
|
from .ilocal import iLocal, extract_contexts_from_file, reduce_contexts
|
|
|
|
|
|
def find_yaml_files(directory):
|
|
"""Findet alle YAML-Dateien in einem Verzeichnis (rekursiv)."""
|
|
patterns = ['**/*.yml', '**/*.yaml']
|
|
files = []
|
|
for pattern in patterns:
|
|
files.extend(glob.glob(os.path.join(directory, pattern), recursive=True))
|
|
return sorted(files)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='bex — BEX-based YAML Grammar Inference',
|
|
)
|
|
parser.add_argument('--dir', type=str, default='roles/',
|
|
help='Verzeichnis mit YAML-Dateien (default: roles/)')
|
|
parser.add_argument('--k-max', type=int, default=5,
|
|
help='Max k für k-ORE-Inferenz (default: 5)')
|
|
parser.add_argument('--context', type=str, default=None,
|
|
help='Auf spezifischen Container-Key beschränken (z.B. tasks)')
|
|
parser.add_argument('--output', type=str, default=None,
|
|
help='Output-Datei für Template (default: stdout)')
|
|
parser.add_argument('--ilocal', action='store_true',
|
|
help='iLocal-Kontextanalyse durchführen')
|
|
parser.add_argument('--crx', action='store_true',
|
|
help='CRX (direct CHARE inference) verwenden')
|
|
parser.add_argument('--verbose', '-v', action='store_true',
|
|
help='Ausführliche Ausgabe')
|
|
parser.add_argument('--stats', action='store_true',
|
|
help='Zeige Token-Statistiken')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not os.path.isdir(args.dir):
|
|
print(f"Fehler: Verzeichnis '{args.dir}' nicht gefunden.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
yaml_files = find_yaml_files(args.dir)
|
|
if not yaml_files:
|
|
print(f"Keine YAML-Dateien in '{args.dir}' gefunden.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(f"Gefundene YAML-Dateien: {len(yaml_files)}", file=sys.stderr)
|
|
|
|
if args.ilocal:
|
|
print("\n=== iLocal: Kontext-Extraktion ===", file=sys.stderr)
|
|
all_contexts = {}
|
|
for f in yaml_files:
|
|
contexts = extract_contexts_from_file(f)
|
|
for ctx, seqs in contexts.items():
|
|
if ctx not in all_contexts:
|
|
all_contexts[ctx] = []
|
|
all_contexts[ctx].extend(seqs)
|
|
|
|
reduced = reduce_contexts(all_contexts)
|
|
print(f" Kontexte gefunden: {len(reduced)}", file=sys.stderr)
|
|
for ctx, seqs in sorted(reduced.items()):
|
|
lengths = [len(s) for s in seqs]
|
|
print(f" {ctx}: {len(seqs)} Sequenzen, "
|
|
f"Längen {min(lengths)}-{max(lengths)}, "
|
|
f"unique_seqs={len(set(tuple(s) for s in seqs))}",
|
|
file=sys.stderr)
|
|
|
|
print("\n=== Tokenisierung ===", file=sys.stderr)
|
|
tokenizer = YAMLTokenizer(resolve_includes=False)
|
|
all_sequences = []
|
|
container_sequences = {}
|
|
|
|
for f in yaml_files:
|
|
try:
|
|
seq = tokenizer.tokenize_file(f)
|
|
if seq:
|
|
all_sequences.append(seq)
|
|
if args.verbose:
|
|
print(f" {os.path.relpath(f)}: {seq}", file=sys.stderr)
|
|
except Exception as e:
|
|
if args.verbose:
|
|
print(f" Fehler in {f}: {e}", file=sys.stderr)
|
|
|
|
if not all_sequences:
|
|
print("Keine Sequenzen extrahiert.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(f" Sequenzen extrahiert: {len(all_sequences)}", file=sys.stderr)
|
|
lengths = [len(s) for s in all_sequences]
|
|
print(f" Längen: min={min(lengths)}, max={max(lengths)}, "
|
|
f"avg={sum(lengths)/len(lengths):.1f}", file=sys.stderr)
|
|
|
|
if args.stats:
|
|
stats = tokenizer.get_statistics()
|
|
print("\n=== Token-Statistiken ===", file=sys.stderr)
|
|
for token, count in list(stats.items())[:30]:
|
|
print(f" {token}: {count}", file=sys.stderr)
|
|
|
|
print("\n=== k-ORE Inferenz ===", file=sys.stderr)
|
|
kore = kOREInference(k_max=args.k_max)
|
|
|
|
if args.crx:
|
|
result = kore.infer_with_crx(all_sequences)
|
|
_, expr, method = result
|
|
print(f" Methode: {method}", file=sys.stderr)
|
|
else:
|
|
result = kore.infer(all_sequences)
|
|
if result:
|
|
_, expr, k = result
|
|
print(f" Bestes k: {k}", file=sys.stderr)
|
|
else:
|
|
expr = "∅"
|
|
print(" Kein Ergebnis", file=sys.stderr)
|
|
|
|
print(f" Inferierter Ausdruck: {expr}", file=sys.stderr)
|
|
|
|
print("\n=== One-Shot Template ===", file=sys.stderr)
|
|
print(file=sys.stderr)
|
|
template = generate_template(expr, context_key=args.context)
|
|
|
|
if args.output:
|
|
with open(args.output, 'w') as f:
|
|
f.write(template)
|
|
print(f"Template geschrieben nach: {args.output}", file=sys.stderr)
|
|
else:
|
|
print(template)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|