""" iLocal — context-based inference (Bex 2007). Per Bex et al. 2007: "Inferring XML Schema Definitions from XML Data" Extracts (context, sequence) pairs from YAML trees, where context is the YAML key (container key). Adapted for YAML: - Context = YAML key whose value is a list (e.g. tasks, steps) - Sequence = item keys within that list (e.g. apt, template, service) Uses container keys directly instead of file paths (design decision: no path overhead). """ import yaml def extract_contexts_from_yaml(data, context_prefix=None): """ Extrahiert (context, sequence)-Paare aus geparstem YAML. Args: data: Geparste YAML-Daten (dict oder list) context_prefix: Interner Prefix für verschachtelte Kontexte Returns: dict: {context_key: [sequence1, sequence2, ...]} """ contexts = {} def walk(node, prefix=None): if isinstance(node, dict): for key, value in node.items(): full_key = f"{prefix}.{key}" if prefix else str(key) if isinstance(value, list) and len(value) > 0: seq = [] for item in value: if isinstance(item, dict): item_key = next( (k for k in item if k != 'name' and not k.startswith('_')), None ) if item_key: seq.append(item_key) else: named = item.get('name', str(item)) seq.append(f"named:{named[:20]}") else: seq.append(str(item)) if full_key not in contexts: contexts[full_key] = [] contexts[full_key].append(seq) for item in value: walk(item, full_key) elif isinstance(value, dict): walk(value, full_key) elif isinstance(value, list): for item in value: walk(item, full_key) elif isinstance(node, list): for item in node: walk(item, prefix) walk(data) return contexts def extract_contexts_from_yaml_string(yaml_string): """ Extrahiert Kontext-Sequenzen aus einem YAML-String. Args: yaml_string: YAML-String Returns: dict: {context_key: [sequence1, sequence2, ...]} """ try: data = yaml.safe_load(yaml_string) except yaml.YAMLError: return {} if data is None: return {} return extract_contexts_from_yaml(data) def extract_contexts_from_file(filepath): """ Extrahiert Kontext-Sequenzen aus einer YAML-Datei. Args: filepath: Pfad zur YAML-Datei Returns: dict: {context_key: [sequence1, sequence2, ...]} """ with open(filepath) as f: return extract_contexts_from_yaml_string(f.read()) def reduce_contexts(context_groups): """ reduce — Generalisierung nach Bex 2007 (Algorithmus reduce). Identifiziert äquivalente Kontext-Modelle und fasst sie zusammen: - Wenn zwei Kontexte die gleiche Sequenz-Struktur haben, werden sie zu einem generalisierten Kontext zusammengefasst Args: context_groups: dict of {context_key: [sequences]} Returns: dict: {generalized_context: [sequences]} (reduziert) """ if not context_groups: return {} signature_map = {} for ctx, seqs in context_groups.items(): # Signatur = sortierte Menge der (Länge, erstes/letztes Element) sig_parts = [] for s in seqs: first = s[0] if s else "∅" last = s[-1] if s else "∅" sig_parts.append((len(s), first, last)) signature = tuple(sorted(set(sig_parts))) if signature not in signature_map: signature_map[signature] = [] signature_map[signature].append(ctx) # Gruppen mit gleicher Signatur → merge result = {} for sig, ctx_list in signature_map.items(): merged_ctx = "|".join(sorted(ctx_list)) merged_seqs = [] for ctx in ctx_list: merged_seqs.extend(context_groups[ctx]) result[merged_ctx] = merged_seqs return result def iLocal(yaml_documents): """ iLocal — Kontext-Inferenz nach Bex 2007. Args: yaml_documents: Liste von YAML-Strings oder Dateipfaden Returns: dict: {generalized_context: [sequences]} """ all_contexts = {} for doc in yaml_documents: if '\n' in doc or '\r' in doc: contexts = extract_contexts_from_yaml_string(doc) else: contexts = extract_contexts_from_file(doc) for ctx, seqs in contexts.items(): if ctx not in all_contexts: all_contexts[ctx] = [] all_contexts[ctx].extend(seqs) return reduce_contexts(all_contexts)