"""Dervish — MCP server. Provides tools to infer regular expression grammars from example sequences. Run as: python -m bex.mcp_server """ from mcp.server.fastmcp import FastMCP from .crx import CRX from .idregex import idregex from .ensemble import infer_ensemble, _matches mcp = FastMCP("grammar-inference", log_level="ERROR") @mcp.tool() def infer_grammar( sequences: list[list[str]], method: str = "crx", kmax: int = 2, N: int = 3, ) -> str: """Infer a grammar (regular expression) from example sequences. Args: sequences: List of sequences, each a list of symbols (strings). method: Algorithm to use — 'crx' (fast, deterministic) or 'idregex' (probabilistic, handles noise better). kmax: Maximum k for k-ORE inference (iDRegEx only). N: Number of EM iterations (iDRegEx only). Returns: A regular expression string describing the inferred grammar. """ if method == "crx": return CRX().infer(sequences) elif method == "idregex": result = idregex(sequences, kmax=kmax, N=N) return result or "∅" else: raise ValueError(f"Unknown method: {method}. Use 'crx' or 'idregex'.") @mcp.tool() def infer_best_grammar( sequences: list[list[str]], prefer: str = "", kmax: int = 2, N: int = 3, ) -> str: """Infer a compact grammar from example sequences. Use this when you have examples of sequential data and want to learn the pattern. The grammar compresses N examples into ~100 chars — far fewer tokens than passing all examples. Pass the existing sequences, get back a pattern you can follow to generate new instances. Args: sequences: List of sequences, each a list of strings (symbols in the order they appear). Example: [["file","copy","command"], ["file","template","command"]]. prefer: Optional — 'crx' for full coverage (accepts all examples), 'idregex' for minimal core (only what every example shares). Default: runs both and picks best by MDL score. kmax: Maximum k for iDRegEx k-ORE inference. N: Number of EM iterations for iDRegEx. Returns: A formatted string with the best grammar, scores, and explanation. Grammar notation: a.b = a then b, (a+b) = a or b, r? = optional, r+ = one or more, r+? = zero or more. """ pref = prefer if prefer else None result = infer_ensemble(sequences, kmax=kmax, N=N, prefer=pref) if result['best'] is None: return f"No grammar found. {result['why']}" lines = [f"Best: {result['best']['algorithm']} (MDL {result['best']['mdl_score']})", f"Grammar: {result['best']['grammar']}", ""] if len(result['all']) > 1: for r in result['all']: m = sum(1 for s in sequences if _matches(r['grammar'], s)) lines.append(f" {r['algorithm']:10s} MDL={r['mdl_score']:>8.2f} match={m}/{len(sequences)}") lines.append("") lines.append(f"Why: {result['why']}") return "\n".join(lines) def main(): mcp.run() if __name__ == "__main__": main()