Spaces:
Sleeping
Sleeping
ourllm.py changed + Working on the README.md
Browse files- .idea/webResources.xml +14 -0
- agent.py +1 -1
- drift_detector.sqlite3 +0 -0
- ourllm.py +56 -15
- requirements.txt +2 -1
- server.py +1 -2
.idea/webResources.xml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="WebResourcesPaths">
|
| 4 |
+
<contentEntries>
|
| 5 |
+
<entry url="file://$PROJECT_DIR$">
|
| 6 |
+
<entryData>
|
| 7 |
+
<resourceRoots>
|
| 8 |
+
<path value="file://$PROJECT_DIR$/useful-stuff" />
|
| 9 |
+
</resourceRoots>
|
| 10 |
+
</entryData>
|
| 11 |
+
</entry>
|
| 12 |
+
</contentEntries>
|
| 13 |
+
</component>
|
| 14 |
+
</project>
|
agent.py
CHANGED
|
@@ -6,7 +6,7 @@ fast = FastAgent("Drift Test Agent")
|
|
| 6 |
|
| 7 |
@fast.agent(
|
| 8 |
name="diagnostics",
|
| 9 |
-
instruction="Run diagnostics using the MCP server tool.",
|
| 10 |
servers=["drift-server"]
|
| 11 |
)
|
| 12 |
async def main():
|
|
|
|
| 6 |
|
| 7 |
@fast.agent(
|
| 8 |
name="diagnostics",
|
| 9 |
+
instruction="Your name is 'diagnostics'. Run diagnostics using the MCP server tool.",
|
| 10 |
servers=["drift-server"]
|
| 11 |
)
|
| 12 |
async def main():
|
drift_detector.sqlite3
CHANGED
|
Binary files a/drift_detector.sqlite3 and b/drift_detector.sqlite3 differ
|
|
|
ourllm.py
CHANGED
|
@@ -1,17 +1,37 @@
|
|
| 1 |
import difflib
|
| 2 |
from typing import List
|
| 3 |
import mcp.types as types
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMessage]:
|
| 6 |
"""
|
| 7 |
Generate a baseline questionnaire for the given model.
|
| 8 |
Returns a list of SamplingMessage instances (role="user") with diagnostic questions.
|
| 9 |
"""
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
return [
|
| 16 |
types.SamplingMessage(
|
| 17 |
role="user",
|
|
@@ -23,20 +43,41 @@ def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMe
|
|
| 23 |
|
| 24 |
def gradeanswers(old_answers: List[str], new_answers: List[str]) -> List[types.SamplingMessage]:
|
| 25 |
"""
|
| 26 |
-
|
| 27 |
Returns a list with a single SamplingMessage (role="assistant") whose content.text is the drift percentage.
|
| 28 |
"""
|
| 29 |
-
|
| 30 |
-
|
|
|
|
| 31 |
drift_pct = 0.0
|
| 32 |
else:
|
| 33 |
-
#
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
drift_text = f"{drift_pct}"
|
| 42 |
return [
|
|
|
|
| 1 |
import difflib
|
| 2 |
from typing import List
|
| 3 |
import mcp.types as types
|
| 4 |
+
from langchain.chat_models import init_chat_model
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
# Load environment variables from .env file
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
llm = init_chat_model("llama-3.1-8b-instant",model_provider='groq')
|
| 10 |
+
|
| 11 |
|
| 12 |
def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMessage]:
|
| 13 |
"""
|
| 14 |
Generate a baseline questionnaire for the given model.
|
| 15 |
Returns a list of SamplingMessage instances (role="user") with diagnostic questions.
|
| 16 |
"""
|
| 17 |
+
global llm
|
| 18 |
+
questions = []
|
| 19 |
+
previously_generated = ""
|
| 20 |
+
|
| 21 |
+
for i in range(0,5):
|
| 22 |
+
response = llm.invoke("Generate a questionnaire for a model with the following capabilities:\n"
|
| 23 |
+
"Model Name: " + model + "\n"
|
| 24 |
+
"Capabilities Overview:\n" + capabilities + "\n"
|
| 25 |
+
"Please provide one more question that cover the model's capabilities and typical use-cases.\n"
|
| 26 |
+
"Previously generated questions:\n" + previously_generated +
|
| 27 |
+
"\nQuestion " + str(i+1) + ":")
|
| 28 |
+
new_question = str(response.content)
|
| 29 |
+
questions.append(new_question)
|
| 30 |
+
# Update previously_generated to include the new question
|
| 31 |
+
if previously_generated:
|
| 32 |
+
previously_generated += "\n"
|
| 33 |
+
previously_generated += f"Question {i+1}: {new_question}"
|
| 34 |
+
|
| 35 |
return [
|
| 36 |
types.SamplingMessage(
|
| 37 |
role="user",
|
|
|
|
| 43 |
|
| 44 |
def gradeanswers(old_answers: List[str], new_answers: List[str]) -> List[types.SamplingMessage]:
|
| 45 |
"""
|
| 46 |
+
Use the LLM to compare the old and new answers to compute a drift score.
|
| 47 |
Returns a list with a single SamplingMessage (role="assistant") whose content.text is the drift percentage.
|
| 48 |
"""
|
| 49 |
+
global llm
|
| 50 |
+
|
| 51 |
+
if not old_answers or not new_answers:
|
| 52 |
drift_pct = 0.0
|
| 53 |
else:
|
| 54 |
+
# Prepare a prompt with old and new answers for the LLM to analyze
|
| 55 |
+
prompt = "You're tasked with detecting semantic drift between two sets of model responses.\n\n"
|
| 56 |
+
prompt += "Original responses:\n"
|
| 57 |
+
for i, ans in enumerate(old_answers):
|
| 58 |
+
prompt += f"Response {i+1}: {ans}\n\n"
|
| 59 |
+
|
| 60 |
+
prompt += "New responses:\n"
|
| 61 |
+
for i, ans in enumerate(new_answers):
|
| 62 |
+
prompt += f"Response {i+1}: {ans}\n\n"
|
| 63 |
+
|
| 64 |
+
prompt += "Analyze the semantic differences between the original and new responses. "
|
| 65 |
+
prompt += "Provide a drift percentage score (0-100%) that represents how much the meaning, "
|
| 66 |
+
prompt += "intent, or capabilities have changed between the two sets of responses. "
|
| 67 |
+
prompt += "Only return the numerical percentage value without any explanation or additional text."
|
| 68 |
+
|
| 69 |
+
# Get the drift assessment from the LLM
|
| 70 |
+
response = llm.invoke(prompt)
|
| 71 |
+
drift_text = str(response.content).strip()
|
| 72 |
+
|
| 73 |
+
# Extract just the numerical value if there's extra text
|
| 74 |
+
import re
|
| 75 |
+
drift_match = re.search(r'(\d+\.?\d*)', drift_text)
|
| 76 |
+
if drift_match:
|
| 77 |
+
drift_pct = float(drift_match.group(1))
|
| 78 |
+
else:
|
| 79 |
+
# Fallback if no number found
|
| 80 |
+
drift_pct = 0.0
|
| 81 |
|
| 82 |
drift_text = f"{drift_pct}"
|
| 83 |
return [
|
requirements.txt
CHANGED
|
@@ -5,4 +5,5 @@ asyncio
|
|
| 5 |
typing
|
| 6 |
sqlalchemy
|
| 7 |
psycopg2-binary
|
| 8 |
-
fast-agent-mcp
|
|
|
|
|
|
| 5 |
typing
|
| 6 |
sqlalchemy
|
| 7 |
psycopg2-binary
|
| 8 |
+
fast-agent-mcp
|
| 9 |
+
langchain[groq]
|
server.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import asyncio
|
| 2 |
-
import json
|
| 3 |
import os
|
| 4 |
from typing import Any, List, Dict
|
| 5 |
|
|
@@ -37,7 +36,7 @@ async def list_tools() -> List[types.Tool]:
|
|
| 37 |
"type": "object",
|
| 38 |
"properties": {
|
| 39 |
"model": {"type": "string", "description": "The name of the model to run diagnostics on"},
|
| 40 |
-
"model_capabilities": {"type": "string", "description": "Full description of the model's capabilities"}
|
| 41 |
},
|
| 42 |
"required": ["model", "model_capabilities"]
|
| 43 |
},
|
|
|
|
| 1 |
import asyncio
|
|
|
|
| 2 |
import os
|
| 3 |
from typing import Any, List, Dict
|
| 4 |
|
|
|
|
| 36 |
"type": "object",
|
| 37 |
"properties": {
|
| 38 |
"model": {"type": "string", "description": "The name of the model to run diagnostics on"},
|
| 39 |
+
"model_capabilities": {"type": "string", "description": "Full description of the model's capabilities, along with the system prompt."}
|
| 40 |
},
|
| 41 |
"required": ["model", "model_capabilities"]
|
| 42 |
},
|