Spaces:

Agents-MCP-Hackathon
/

drift-detector

Sleeping

App Files Files Community

Sars6 commited on Jun 9

Commit

771473e

1 Parent(s): a467728

ourllm.py changed + Working on the README.md

Browse files

Files changed (6) hide show

.idea/webResources.xml +14 -0
agent.py +1 -1
drift_detector.sqlite3 +0 -0
ourllm.py +56 -15
requirements.txt +2 -1
server.py +1 -2

.idea/webResources.xml ADDED Viewed

	@@ -0,0 +1,14 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="WebResourcesPaths">
+    <contentEntries>
+      <entry url="file://$PROJECT_DIR$">
+        <entryData>
+          <resourceRoots>
+            <path value="file://$PROJECT_DIR$/useful-stuff" />
+          </resourceRoots>
+        </entryData>
+      </entry>
+    </contentEntries>
+  </component>
+</project>

agent.py CHANGED Viewed

@@ -6,7 +6,7 @@ fast = FastAgent("Drift Test Agent")
 @fast.agent(
     name="diagnostics",
-    instruction="Run diagnostics using the MCP server tool.",
     servers=["drift-server"]
 )
 async def main():

 @fast.agent(
     name="diagnostics",
+    instruction="Your name is 'diagnostics'. Run diagnostics using the MCP server tool.",
     servers=["drift-server"]
 )
 async def main():

drift_detector.sqlite3 CHANGED Viewed

Binary files a/drift_detector.sqlite3 and b/drift_detector.sqlite3 differ

ourllm.py CHANGED Viewed

@@ -1,17 +1,37 @@
 import difflib
 from typing import List
 import mcp.types as types
 def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMessage]:
     """
     Generate a baseline questionnaire for the given model.
     Returns a list of SamplingMessage instances (role="user") with diagnostic questions.
     """
-    questions = [
-        f"Model Name: {model}\nPlease confirm your model name.",
-        f"Capabilities Overview:\n{capabilities}\nPlease summarize your key capabilities.",
-        "Describe a typical use-case scenario that demonstrates these capabilities.",
-    ]
     return [
         types.SamplingMessage(
             role="user",
@@ -23,20 +43,41 @@ def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMe
 def gradeanswers(old_answers: List[str], new_answers: List[str]) -> List[types.SamplingMessage]:
     """
-    Compare the old and new answers to compute a drift score.
     Returns a list with a single SamplingMessage (role="assistant") whose content.text is the drift percentage.
     """
-    total = len(old_answers)
-    if total == 0:
         drift_pct = 0.0
     else:
-        # Count how many answers are sufficiently similar
-        similar_count = 0
-        for old, new in zip(old_answers, new_answers):
-            ratio = difflib.SequenceMatcher(None, old, new).ratio()
-            if ratio >= 0.8:
-                similar_count += 1
-        drift_pct = round((1 - (similar_count / total)) * 100, 2)
     drift_text = f"{drift_pct}"
     return [

 import difflib
 from typing import List
 import mcp.types as types
+from langchain.chat_models import init_chat_model
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+llm = init_chat_model("llama-3.1-8b-instant",model_provider='groq')
 def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMessage]:
     """
     Generate a baseline questionnaire for the given model.
     Returns a list of SamplingMessage instances (role="user") with diagnostic questions.
     """
+    global llm
+    questions = []
+    previously_generated = ""
+    for i in range(0,5):
+        response = llm.invoke("Generate a questionnaire for a model with the following capabilities:\n"
+                              "Model Name: " + model + "\n"
+                            "Capabilities Overview:\n" + capabilities + "\n"
+                            "Please provide one more question that cover the model's capabilities and typical use-cases.\n"
+                            "Previously generated questions:\n" + previously_generated +
+                            "\nQuestion " + str(i+1) + ":")
+        new_question = str(response.content)
+        questions.append(new_question)
+        # Update previously_generated to include the new question
+        if previously_generated:
+            previously_generated += "\n"
+        previously_generated += f"Question {i+1}: {new_question}"
     return [
         types.SamplingMessage(
             role="user",
 def gradeanswers(old_answers: List[str], new_answers: List[str]) -> List[types.SamplingMessage]:
     """
+    Use the LLM to compare the old and new answers to compute a drift score.
     Returns a list with a single SamplingMessage (role="assistant") whose content.text is the drift percentage.
     """
+    global llm
+    if not old_answers or not new_answers:
         drift_pct = 0.0
     else:
+        # Prepare a prompt with old and new answers for the LLM to analyze
+        prompt = "You're tasked with detecting semantic drift between two sets of model responses.\n\n"
+        prompt += "Original responses:\n"
+        for i, ans in enumerate(old_answers):
+            prompt += f"Response {i+1}: {ans}\n\n"
+        prompt += "New responses:\n"
+        for i, ans in enumerate(new_answers):
+            prompt += f"Response {i+1}: {ans}\n\n"
+        prompt += "Analyze the semantic differences between the original and new responses. "
+        prompt += "Provide a drift percentage score (0-100%) that represents how much the meaning, "
+        prompt += "intent, or capabilities have changed between the two sets of responses. "
+        prompt += "Only return the numerical percentage value without any explanation or additional text."
+        # Get the drift assessment from the LLM
+        response = llm.invoke(prompt)
+        drift_text = str(response.content).strip()
+        # Extract just the numerical value if there's extra text
+        import re
+        drift_match = re.search(r'(\d+\.?\d*)', drift_text)
+        if drift_match:
+            drift_pct = float(drift_match.group(1))
+        else:
+            # Fallback if no number found
+            drift_pct = 0.0
     drift_text = f"{drift_pct}"
     return [

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ asyncio
 typing
 sqlalchemy
 psycopg2-binary
-fast-agent-mcp

 typing
 sqlalchemy
 psycopg2-binary
+fast-agent-mcp
+langchain[groq]

server.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import asyncio
-import json
 import os
 from typing import Any, List, Dict
@@ -37,7 +36,7 @@ async def list_tools() -> List[types.Tool]:
                 "type": "object",
                 "properties": {
                     "model": {"type": "string", "description": "The name of the model to run diagnostics on"},
-                    "model_capabilities": {"type": "string", "description": "Full description of the model's capabilities"}
                 },
                 "required": ["model", "model_capabilities"]
             },

 import asyncio
 import os
 from typing import Any, List, Dict
                 "type": "object",
                 "properties": {
                     "model": {"type": "string", "description": "The name of the model to run diagnostics on"},
+                    "model_capabilities": {"type": "string", "description": "Full description of the model's capabilities, along with the system prompt."}
                 },
                 "required": ["model", "model_capabilities"]
             },