move format-specific adapters to examples/, purge format-specific MCP tools

This commit is contained in:
tobjend 2026-07-01 10:36:14 +02:00
parent 25d844d1f9
commit d7477344a6
3 changed files with 2 additions and 84 deletions

View file

@ -1,20 +1,14 @@
"""Grammar Inference Engine — MCP server.
"""Dervish — MCP server.
Provides tools to infer regular expression grammars from example sequences.
Run as: python -m bex.mcp_server
"""
import json
import sys
from pathlib import Path
from typing import Any
from mcp.server.fastmcp import FastMCP
from .crx import CRX
from .idregex import idregex
from .ensemble import infer_ensemble, _matches
from .yaml_to_seq import yaml_file_to_sequence, sequences_to_crx
mcp = FastMCP("grammar-inference", log_level="ERROR")
@ -54,8 +48,7 @@ def infer_best_grammar(
N: int = 3,
) -> str:
"""Infer a compact grammar from example sequences. Use this when you
need to generate structured content (Ansible roles, CI configs, Helm
values, YAML configs, etc.) and have existing examples to learn from.
have examples of sequential data and want to learn the pattern.
The grammar compresses N examples into ~100 chars far fewer tokens
than passing all examples. Pass the existing sequences, get back a
@ -92,81 +85,6 @@ def infer_best_grammar(
return "\n".join(lines)
@mcp.tool()
def infer_yaml_grammar(
yaml_dir: str,
pattern: str = "**/*.yml",
method: str = "crx",
) -> str:
"""Infer a grammar from YAML files by converting them to key-path sequences.
Each YAML file is converted to a sequence of key paths (DFS traversal).
CRX then learns the common pattern across all files.
Args:
yaml_dir: Root directory to search for YAML files.
pattern: Glob pattern for YAML files (default: **/*.yml).
method: Algorithm to use ('crx' or 'idregex').
Returns:
A regular expression grammar describing the YAML structure.
"""
files = sorted(Path(yaml_dir).rglob(pattern))
sequences = []
for f in files:
if f.is_file():
try:
seq = yaml_file_to_sequence(f)
if seq:
sequences.append(seq)
except Exception:
continue
if not sequences:
return "ε (no sequences found)"
if method == "crx":
return CRX().infer(sequences)
else:
result = idregex(sequences, kmax=2, N=3)
return result or ""
@mcp.tool()
def infer_ansible_role_grammar(roles_dir: str = ".") -> str:
"""Infer grammars from Ansible role task module sequences.
Reads tasks/main.yml from each role, extracts the sequence of
Ansible module names, groups roles by category prefix, and learns
a per-category grammar.
Args:
roles_dir: Path to the Ansible roles directory.
Returns:
A formatted report with per-category grammars and role listings.
"""
try:
from .role_grammar import collect_all_role_sequences, learn_grammar
except ImportError:
return "role_grammar module not available"
all_roles, by_category = collect_all_role_sequences(roles_dir)
if not all_roles:
return "No roles found."
lines = [f"Found {len(all_roles)} roles in {len(by_category)} categories\n"]
for cat in sorted(by_category.keys()):
items = by_category[cat]
seqs = [s for _, s in items]
lines.append(f"── {cat} ({len(items)} roles) ──")
if len(items) > 1:
g = learn_grammar(seqs)
lines.append(f" Grammar: {g}")
name, seq = items[0]
lines.append(f" Roles: {', '.join(n for n, _ in items)}")
lines.append("")
return "\n".join(lines)
def main():
mcp.run()