grammar-inference-engine/bex/mcp_server.py

64 lines
2.2 KiB
Python

"""Dervish — MCP server.
Provides tools to infer regular expression grammars from example sequences.
Run as: python -m bex.mcp_server
"""
from mcp.server.fastmcp import FastMCP
from .ensemble import infer_ensemble, _matches
mcp = FastMCP("grammar-inference", log_level="ERROR")
@mcp.tool()
def infer_best_grammar(
sequences: list[list[str]],
prefer: str = "",
kmax: int = 2,
N: int = 3,
) -> str:
"""Infer a compact grammar from example sequences. Use this when you
have examples of sequential data and want to learn the pattern.
The grammar compresses N examples into ~100 chars — far fewer tokens
than passing all examples. Pass the existing sequences, get back a
pattern you can follow to generate new instances.
Args:
sequences: List of sequences, each a list of strings (symbols in
the order they appear). Example: [["file","copy","command"],
["file","template","command"]].
prefer: Optional — 'crx' for full coverage (accepts all examples),
'idregex' for minimal core (only what every example shares).
Default: runs both and picks best by MDL score.
kmax: Maximum k for iDRegEx k-ORE inference.
N: Number of EM iterations for iDRegEx.
Returns:
A formatted string with the best grammar, scores, and explanation.
Grammar notation: a.b = a then b, (a+b) = a or b, r? = optional,
r+ = one or more, r+? = zero or more.
"""
pref = prefer if prefer else None
result = infer_ensemble(sequences, kmax=kmax, N=N, prefer=pref)
if result['best'] is None:
return f"No grammar found. {result['why']}"
lines = [f"Best: {result['best']['algorithm']} (MDL {result['best']['mdl_score']})",
f"Grammar: {result['best']['grammar']}",
""]
if len(result['all']) > 1:
for r in result['all']:
m = sum(1 for s in sequences if _matches(r['grammar'], s))
lines.append(f" {r['algorithm']:10s} MDL={r['mdl_score']:>8.2f} match={m}/{len(sequences)}")
lines.append("")
lines.append(f"Why: {result['why']}")
return "\n".join(lines)
def main():
mcp.run()
if __name__ == "__main__":
main()