Spaces:

Agents-MCP-Hackathon
/

drift-detector

Sleeping

App Files Files Community

Sars6 commited on Jun 9

Commit

dc9d63b

1 Parent(s): 9fa019c

Idk this works. The llm added it's own stuff.

Browse files

Files changed (2) hide show

ourllm.py +7 -0
server.py +152 -72

ourllm.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def genratequestionnaire(model, capabilities):
+    return None
+def gradeanswers(old_answers, new_answers):
+    return None

server.py CHANGED Viewed

@@ -1,92 +1,172 @@
-# server.py
 import asyncio
 from mcp.server import Server
 from mcp.server.stdio import stdio_server
-import mcp.types as types
-# Define diagnostic prompts statically for now
-PROMPTS = {
-    "drift-diagnostics": types.Prompt(
-        name="drift-diagnostics",
-        description="Run a diagnostic questionnaire to test LLM consistency.",
-        arguments=[],
-    )
-}
-# Setup server
-app = Server("mcp-drift-server", version="0.1.0")
-@app.list_prompts()
-async def list_prompts() -> list[types.Prompt]:
-    return list(PROMPTS.values())
-@app.get_prompt()
-async def get_prompt(name: str, arguments: dict[str, str] | None = None) -> types.GetPromptResult:
-    if name not in PROMPTS:
-        raise ValueError(f"Prompt not found: {name}")
-    # Static message for MVP – replace with dynamic question set later
-    return types.GetPromptResult(
-        messages=[
-            types.PromptMessage(
-                role="user",
-                content=types.TextContent(
-                    type="text",
-                    text="Answer the following: What's the capital of France?"
-                )
-            ),
-            types.PromptMessage(
-                role="user",
-                content=types.TextContent(
-                    type="text",
-                    text="Explain why the sky is blue."
-                )
-            ),
-        ]
-    )
-from mcp.server import Server
-import mcp.types as types
-# Assuming 'app' is your MCP Server instance
-async def sample(app: Server, messages: list[types.SamplingMessage]):
-    result = await app.request_context.session.create_message(
-        messages=messages,
-        max_tokens=300,
-        temperature=0.7
-    )
-    return result
 @app.list_tools()
 async def list_tools() -> list[types.Tool]:
     return [
         types.Tool(
-            name="init_diagnostics",
-            description="Run diagnostic questionnaire on the connected LLM.",
-            inputSchema={"model_name": "Name of the LLM model"},
-        )
     ]
-@app.call_tool()
-async def call_tool(name: str, arguments: dict[str, str] | None = None) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
-    """
-    Initializes diagnostics by running the questionnaire on the connected LLM.
-    """
-    # You could fetch dynamic questions here if needed
-    questions = [
-        types.SamplingMessage(role="user", content=types.TextContent(type="text", text="What is the capital of France?")),
-        types.SamplingMessage(role="user", content=types.TextContent(type="text", text="Why is the sky blue?")),
-    ]
-    response = await sample(app, questions)
-    # Return the assistant’s message(s) back to the caller
-    return [types.TextContent(type="text", text=str(response.content))]
-# Main entrypoint
 async def main():
     async with stdio_server() as streams:
         await app.run(streams[0], streams[1], app.create_initialization_options())

 import asyncio
+import json
+import os
+from typing import Any
+import mcp.types as types
+from mcp import CreateMessageResult
 from mcp.server import Server
 from mcp.server.stdio import stdio_server
+from ourllm import genratequestionnaire, gradeanswers
+DATA_DIR = "data"
+os.makedirs(DATA_DIR, exist_ok=True)
+app = Server("mcp-drift-server")
+registered_models = {}
+def get_all_models():
+    """Retrieve all registered models."""
+    return list(registered_models.keys())
+def search_models(query: str):
+    """Search registered models by name."""
+    return [model for model in registered_models if query.lower() in model.lower()]
+def get_model_details(model_name: str):
+    """Get details of a specific model."""
+    return registered_models.get(model_name, None)
+def save_model(model_name: str, model_details: dict):
+    """Save a new model or update an existing one."""
+    registered_models[model_name] = model_details
+    with open(os.path.join(DATA_DIR, "models.json"), "w") as f:
+        json.dump(registered_models, f, indent=2)
 @app.list_tools()
 async def list_tools() -> list[types.Tool]:
     return [
         types.Tool(
+            name="run_initial_diagnostics",
+            description="Generate and store baseline diagnostics for a connected LLM.",
+            inputSchema={"type":"object",
+                         "properties": {
+                              "model": {
+                                  "type": "string",
+                                  "description": "The name of the model to run diagnostics on"
+                              },
+                             "model_capabilities": {
+                                    "type": "string",
+                                    "description": "Full description of the model's capabilities, including any special features"
+                             }
+                         },
+                          "required": ["model", "model_capabilities"]},
+        ),
+        types.Tool(
+            name="check_drift",
+            description="Re-run diagnostics and compare to baseline for drift scoring.",
+            inputSchema={"type":"object",
+                         "properties": {
+                              "model": {
+                                  "type": "string",
+                                  "description": "The name of the model to run diagnostics on"
+                              },
+                         },
+                          "required": ["model"]},
+        ),
     ]
+# === Sampling Wrapper ===
+async def sample(messages: list[types.SamplingMessage], max_tokens=300) -> CreateMessageResult:
+    return await app.request_context.session.create_message(
+        messages=messages,
+        max_tokens=max_tokens,
+        temperature=0.7
+    )
+# === Baseline File Paths ===
+def get_baseline_path(model_name):
+    return os.path.join(DATA_DIR, f"{model_name}_baseline.json")
+def get_response_path(model_name):
+    return os.path.join(DATA_DIR, f"{model_name}_latest.json")
+# === Core Logic ===
+async def run_initial_diagnostics(arguments: dict[str, Any]) -> list[types.TextContent]:
+    if arguments and "model" in arguments:
+        model = arguments["model"]
+    else:
+        raise(ValueError("Model details is required"))
+    # 1. Ask the server's internal LLM to generate a questionnaire
+    questions = await genratequestionnaire(model, arguments["model_capabilities"])  # Server-side trusted LLM
+    # 2. Send questionnaire to target LLM (i.e., the client)
+    answers = await sample(questions)  # Client model's answers
+    # 3. Save Q/A pair
+    with open(get_baseline_path(model), "w") as f:
+        json.dump({
+            "questions": [m.content.text for m in questions],
+            "answers": [m.content.text for m in answers]
+        }, f, indent=2)
+    return [types.TextContent(type="text", text="Baseline stored for model: " + model)]
+async def check_drift(arguments: dict[str, str]) -> list[types.TextContent]:
+    if arguments and "model" in arguments:
+        model = arguments["model"]
+    else:
+        raise (ValueError("Model details is required"))
+    baseline_path = get_baseline_path(model)
+    if not os.path.exists(baseline_path):
+        return [types.TextContent(type="text", text="No baseline exists for model: " + model)]
+    with open(baseline_path) as f:
+        data = json.load(f)
+        questions = [types.SamplingMessage(role="user", content=types.TextContent(type="text", text=q)) for q in
+                     data["questions"]]
+        old_answers = data["answers"]
+    # 1. Ask the model again
+    new_answers_msgs = await sample(questions)
+    new_answers = [m.content.text for m in new_answers_msgs]
+    grading_response = await gradeanswers(old_answers, new_answers)
+    drift_score = grading_response[0].content.text.strip()
+    # 3. Save the response
+    with open(get_response_path(model), "w") as f:
+        json.dump({
+            "new_answers": new_answers,
+            "drift_score": drift_score
+        }, f, indent=2)
+    # 4. Optionally alert if high drift
+    alert = "🚨 Significant drift detected!" if float(drift_score) > 50 else "✅ Drift within acceptable limits."
+    return [
+        types.TextContent(type="text", text=f"Drift score for {model}: {drift_score}"),
+        types.TextContent(type="text", text=alert)
+    ]
+@app.call_tool()
+async def call_tool(name: str, arguments: dict[str, Any] | None = None):
+    if name == "run_initial_diagnostics":
+        return await run_initial_diagnostics(arguments)
+    elif name == "check_drift":
+        return await check_drift(arguments)
+    else:
+        raise ValueError(f"Unknown tool: {name}")
+# === Entrypoint ===
 async def main():
     async with stdio_server() as streams:
         await app.run(streams[0], streams[1], app.create_initialization_options())