81 lines
2.5 KiB
Python
81 lines
2.5 KiB
Python
"""Convert YAML files to key-path sequences for BEX grammar inference."""
|
|
|
|
from pathlib import Path
|
|
import yaml
|
|
|
|
|
|
def yaml_to_keypath_sequence(data, prefix=""):
|
|
"""Convert parsed YAML data to a sequence of key paths (DFS traversal).
|
|
|
|
Each leaf (scalar) emits its full key path as a symbol.
|
|
Lists use a generic `[]` marker (no indices).
|
|
Values are NOT included — only key paths.
|
|
"""
|
|
seq = []
|
|
if isinstance(data, dict):
|
|
for key, value in data.items():
|
|
path = f"{prefix}.{key}" if prefix else key
|
|
if isinstance(value, (dict, list)):
|
|
seq.extend(yaml_to_keypath_sequence(value, path))
|
|
else:
|
|
seq.append(path)
|
|
elif isinstance(data, list):
|
|
for item in data:
|
|
list_prefix = f"{prefix}[]" if prefix else "[]"
|
|
if isinstance(item, (dict, list)):
|
|
seq.extend(yaml_to_keypath_sequence(item, list_prefix))
|
|
else:
|
|
seq.append(list_prefix)
|
|
return seq
|
|
|
|
|
|
def yaml_file_to_sequence(filepath):
|
|
"""Load a YAML file and convert to a key-path sequence."""
|
|
with open(filepath) as f:
|
|
data = yaml.safe_load(f)
|
|
if data is None:
|
|
return []
|
|
return yaml_to_keypath_sequence(data)
|
|
|
|
|
|
def is_vault_file(filepath):
|
|
"""Check if a file is an Ansible vault file (encrypted)."""
|
|
try:
|
|
with open(filepath) as f:
|
|
first = f.read(100)
|
|
return '$ANSIBLE_VAULT' in first or first.startswith('!vault |')
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def collect_all_sequences(root_dir=".", include_vault=False):
|
|
"""Collect key-path sequences from all YAML files.
|
|
|
|
Returns:
|
|
list of (filepath, sequence) tuples.
|
|
"""
|
|
results = []
|
|
for path in sorted(Path(root_dir).rglob("*.yml")):
|
|
parts = path.parts
|
|
if any(d in parts for d in ('node_modules', '.venv', '__pycache__', '.git')):
|
|
continue
|
|
skippable = ('vault.yml' in path.name or 'vault' in path.name)
|
|
if not include_vault and (skippable or is_vault_file(path)):
|
|
continue
|
|
try:
|
|
seq = yaml_file_to_sequence(path)
|
|
if seq:
|
|
results.append((path, seq))
|
|
except Exception as e:
|
|
print(f" SKIP {path}: {e}")
|
|
return results
|
|
|
|
|
|
def sequences_to_crx(result_list):
|
|
"""Run CRX on collected sequences."""
|
|
from .crx import CRX
|
|
sequences = [seq for _, seq in result_list]
|
|
if not sequences:
|
|
return 'ε'
|
|
crx = CRX()
|
|
return crx.infer(sequences)
|