Spaces:
Sleeping
Sleeping
ourllm.py changed + Working on the README.md
Browse files- .idea/webResources.xml +14 -0
- agent.py +1 -1
- drift_detector.sqlite3 +0 -0
- ourllm.py +56 -15
- requirements.txt +2 -1
- server.py +1 -2
.idea/webResources.xml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="WebResourcesPaths">
|
4 |
+
<contentEntries>
|
5 |
+
<entry url="file://$PROJECT_DIR$">
|
6 |
+
<entryData>
|
7 |
+
<resourceRoots>
|
8 |
+
<path value="file://$PROJECT_DIR$/useful-stuff" />
|
9 |
+
</resourceRoots>
|
10 |
+
</entryData>
|
11 |
+
</entry>
|
12 |
+
</contentEntries>
|
13 |
+
</component>
|
14 |
+
</project>
|
agent.py
CHANGED
@@ -6,7 +6,7 @@ fast = FastAgent("Drift Test Agent")
|
|
6 |
|
7 |
@fast.agent(
|
8 |
name="diagnostics",
|
9 |
-
instruction="Run diagnostics using the MCP server tool.",
|
10 |
servers=["drift-server"]
|
11 |
)
|
12 |
async def main():
|
|
|
6 |
|
7 |
@fast.agent(
|
8 |
name="diagnostics",
|
9 |
+
instruction="Your name is 'diagnostics'. Run diagnostics using the MCP server tool.",
|
10 |
servers=["drift-server"]
|
11 |
)
|
12 |
async def main():
|
drift_detector.sqlite3
CHANGED
Binary files a/drift_detector.sqlite3 and b/drift_detector.sqlite3 differ
|
|
ourllm.py
CHANGED
@@ -1,17 +1,37 @@
|
|
1 |
import difflib
|
2 |
from typing import List
|
3 |
import mcp.types as types
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMessage]:
|
6 |
"""
|
7 |
Generate a baseline questionnaire for the given model.
|
8 |
Returns a list of SamplingMessage instances (role="user") with diagnostic questions.
|
9 |
"""
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
return [
|
16 |
types.SamplingMessage(
|
17 |
role="user",
|
@@ -23,20 +43,41 @@ def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMe
|
|
23 |
|
24 |
def gradeanswers(old_answers: List[str], new_answers: List[str]) -> List[types.SamplingMessage]:
|
25 |
"""
|
26 |
-
|
27 |
Returns a list with a single SamplingMessage (role="assistant") whose content.text is the drift percentage.
|
28 |
"""
|
29 |
-
|
30 |
-
|
|
|
31 |
drift_pct = 0.0
|
32 |
else:
|
33 |
-
#
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
drift_text = f"{drift_pct}"
|
42 |
return [
|
|
|
1 |
import difflib
|
2 |
from typing import List
|
3 |
import mcp.types as types
|
4 |
+
from langchain.chat_models import init_chat_model
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
# Load environment variables from .env file
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
llm = init_chat_model("llama-3.1-8b-instant",model_provider='groq')
|
10 |
+
|
11 |
|
12 |
def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMessage]:
|
13 |
"""
|
14 |
Generate a baseline questionnaire for the given model.
|
15 |
Returns a list of SamplingMessage instances (role="user") with diagnostic questions.
|
16 |
"""
|
17 |
+
global llm
|
18 |
+
questions = []
|
19 |
+
previously_generated = ""
|
20 |
+
|
21 |
+
for i in range(0,5):
|
22 |
+
response = llm.invoke("Generate a questionnaire for a model with the following capabilities:\n"
|
23 |
+
"Model Name: " + model + "\n"
|
24 |
+
"Capabilities Overview:\n" + capabilities + "\n"
|
25 |
+
"Please provide one more question that cover the model's capabilities and typical use-cases.\n"
|
26 |
+
"Previously generated questions:\n" + previously_generated +
|
27 |
+
"\nQuestion " + str(i+1) + ":")
|
28 |
+
new_question = str(response.content)
|
29 |
+
questions.append(new_question)
|
30 |
+
# Update previously_generated to include the new question
|
31 |
+
if previously_generated:
|
32 |
+
previously_generated += "\n"
|
33 |
+
previously_generated += f"Question {i+1}: {new_question}"
|
34 |
+
|
35 |
return [
|
36 |
types.SamplingMessage(
|
37 |
role="user",
|
|
|
43 |
|
44 |
def gradeanswers(old_answers: List[str], new_answers: List[str]) -> List[types.SamplingMessage]:
|
45 |
"""
|
46 |
+
Use the LLM to compare the old and new answers to compute a drift score.
|
47 |
Returns a list with a single SamplingMessage (role="assistant") whose content.text is the drift percentage.
|
48 |
"""
|
49 |
+
global llm
|
50 |
+
|
51 |
+
if not old_answers or not new_answers:
|
52 |
drift_pct = 0.0
|
53 |
else:
|
54 |
+
# Prepare a prompt with old and new answers for the LLM to analyze
|
55 |
+
prompt = "You're tasked with detecting semantic drift between two sets of model responses.\n\n"
|
56 |
+
prompt += "Original responses:\n"
|
57 |
+
for i, ans in enumerate(old_answers):
|
58 |
+
prompt += f"Response {i+1}: {ans}\n\n"
|
59 |
+
|
60 |
+
prompt += "New responses:\n"
|
61 |
+
for i, ans in enumerate(new_answers):
|
62 |
+
prompt += f"Response {i+1}: {ans}\n\n"
|
63 |
+
|
64 |
+
prompt += "Analyze the semantic differences between the original and new responses. "
|
65 |
+
prompt += "Provide a drift percentage score (0-100%) that represents how much the meaning, "
|
66 |
+
prompt += "intent, or capabilities have changed between the two sets of responses. "
|
67 |
+
prompt += "Only return the numerical percentage value without any explanation or additional text."
|
68 |
+
|
69 |
+
# Get the drift assessment from the LLM
|
70 |
+
response = llm.invoke(prompt)
|
71 |
+
drift_text = str(response.content).strip()
|
72 |
+
|
73 |
+
# Extract just the numerical value if there's extra text
|
74 |
+
import re
|
75 |
+
drift_match = re.search(r'(\d+\.?\d*)', drift_text)
|
76 |
+
if drift_match:
|
77 |
+
drift_pct = float(drift_match.group(1))
|
78 |
+
else:
|
79 |
+
# Fallback if no number found
|
80 |
+
drift_pct = 0.0
|
81 |
|
82 |
drift_text = f"{drift_pct}"
|
83 |
return [
|
requirements.txt
CHANGED
@@ -5,4 +5,5 @@ asyncio
|
|
5 |
typing
|
6 |
sqlalchemy
|
7 |
psycopg2-binary
|
8 |
-
fast-agent-mcp
|
|
|
|
5 |
typing
|
6 |
sqlalchemy
|
7 |
psycopg2-binary
|
8 |
+
fast-agent-mcp
|
9 |
+
langchain[groq]
|
server.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import asyncio
|
2 |
-
import json
|
3 |
import os
|
4 |
from typing import Any, List, Dict
|
5 |
|
@@ -37,7 +36,7 @@ async def list_tools() -> List[types.Tool]:
|
|
37 |
"type": "object",
|
38 |
"properties": {
|
39 |
"model": {"type": "string", "description": "The name of the model to run diagnostics on"},
|
40 |
-
"model_capabilities": {"type": "string", "description": "Full description of the model's capabilities"}
|
41 |
},
|
42 |
"required": ["model", "model_capabilities"]
|
43 |
},
|
|
|
1 |
import asyncio
|
|
|
2 |
import os
|
3 |
from typing import Any, List, Dict
|
4 |
|
|
|
36 |
"type": "object",
|
37 |
"properties": {
|
38 |
"model": {"type": "string", "description": "The name of the model to run diagnostics on"},
|
39 |
+
"model_capabilities": {"type": "string", "description": "Full description of the model's capabilities, along with the system prompt."}
|
40 |
},
|
41 |
"required": ["model", "model_capabilities"]
|
42 |
},
|