Sars6 commited on
Commit
771473e
·
1 Parent(s): a467728

ourllm.py changed + Working on the README.md

Browse files
Files changed (6) hide show
  1. .idea/webResources.xml +14 -0
  2. agent.py +1 -1
  3. drift_detector.sqlite3 +0 -0
  4. ourllm.py +56 -15
  5. requirements.txt +2 -1
  6. server.py +1 -2
.idea/webResources.xml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="WebResourcesPaths">
4
+ <contentEntries>
5
+ <entry url="file://$PROJECT_DIR$">
6
+ <entryData>
7
+ <resourceRoots>
8
+ <path value="file://$PROJECT_DIR$/useful-stuff" />
9
+ </resourceRoots>
10
+ </entryData>
11
+ </entry>
12
+ </contentEntries>
13
+ </component>
14
+ </project>
agent.py CHANGED
@@ -6,7 +6,7 @@ fast = FastAgent("Drift Test Agent")
6
 
7
  @fast.agent(
8
  name="diagnostics",
9
- instruction="Run diagnostics using the MCP server tool.",
10
  servers=["drift-server"]
11
  )
12
  async def main():
 
6
 
7
  @fast.agent(
8
  name="diagnostics",
9
+ instruction="Your name is 'diagnostics'. Run diagnostics using the MCP server tool.",
10
  servers=["drift-server"]
11
  )
12
  async def main():
drift_detector.sqlite3 CHANGED
Binary files a/drift_detector.sqlite3 and b/drift_detector.sqlite3 differ
 
ourllm.py CHANGED
@@ -1,17 +1,37 @@
1
  import difflib
2
  from typing import List
3
  import mcp.types as types
 
 
 
 
 
 
 
4
 
5
  def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMessage]:
6
  """
7
  Generate a baseline questionnaire for the given model.
8
  Returns a list of SamplingMessage instances (role="user") with diagnostic questions.
9
  """
10
- questions = [
11
- f"Model Name: {model}\nPlease confirm your model name.",
12
- f"Capabilities Overview:\n{capabilities}\nPlease summarize your key capabilities.",
13
- "Describe a typical use-case scenario that demonstrates these capabilities.",
14
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  return [
16
  types.SamplingMessage(
17
  role="user",
@@ -23,20 +43,41 @@ def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMe
23
 
24
  def gradeanswers(old_answers: List[str], new_answers: List[str]) -> List[types.SamplingMessage]:
25
  """
26
- Compare the old and new answers to compute a drift score.
27
  Returns a list with a single SamplingMessage (role="assistant") whose content.text is the drift percentage.
28
  """
29
- total = len(old_answers)
30
- if total == 0:
 
31
  drift_pct = 0.0
32
  else:
33
- # Count how many answers are sufficiently similar
34
- similar_count = 0
35
- for old, new in zip(old_answers, new_answers):
36
- ratio = difflib.SequenceMatcher(None, old, new).ratio()
37
- if ratio >= 0.8:
38
- similar_count += 1
39
- drift_pct = round((1 - (similar_count / total)) * 100, 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  drift_text = f"{drift_pct}"
42
  return [
 
1
  import difflib
2
  from typing import List
3
  import mcp.types as types
4
+ from langchain.chat_models import init_chat_model
5
+ from dotenv import load_dotenv
6
+ # Load environment variables from .env file
7
+ load_dotenv()
8
+
9
+ llm = init_chat_model("llama-3.1-8b-instant",model_provider='groq')
10
+
11
 
12
  def genratequestionnaire(model: str, capabilities: str) -> List[types.SamplingMessage]:
13
  """
14
  Generate a baseline questionnaire for the given model.
15
  Returns a list of SamplingMessage instances (role="user") with diagnostic questions.
16
  """
17
+ global llm
18
+ questions = []
19
+ previously_generated = ""
20
+
21
+ for i in range(0,5):
22
+ response = llm.invoke("Generate a questionnaire for a model with the following capabilities:\n"
23
+ "Model Name: " + model + "\n"
24
+ "Capabilities Overview:\n" + capabilities + "\n"
25
+ "Please provide one more question that cover the model's capabilities and typical use-cases.\n"
26
+ "Previously generated questions:\n" + previously_generated +
27
+ "\nQuestion " + str(i+1) + ":")
28
+ new_question = str(response.content)
29
+ questions.append(new_question)
30
+ # Update previously_generated to include the new question
31
+ if previously_generated:
32
+ previously_generated += "\n"
33
+ previously_generated += f"Question {i+1}: {new_question}"
34
+
35
  return [
36
  types.SamplingMessage(
37
  role="user",
 
43
 
44
  def gradeanswers(old_answers: List[str], new_answers: List[str]) -> List[types.SamplingMessage]:
45
  """
46
+ Use the LLM to compare the old and new answers to compute a drift score.
47
  Returns a list with a single SamplingMessage (role="assistant") whose content.text is the drift percentage.
48
  """
49
+ global llm
50
+
51
+ if not old_answers or not new_answers:
52
  drift_pct = 0.0
53
  else:
54
+ # Prepare a prompt with old and new answers for the LLM to analyze
55
+ prompt = "You're tasked with detecting semantic drift between two sets of model responses.\n\n"
56
+ prompt += "Original responses:\n"
57
+ for i, ans in enumerate(old_answers):
58
+ prompt += f"Response {i+1}: {ans}\n\n"
59
+
60
+ prompt += "New responses:\n"
61
+ for i, ans in enumerate(new_answers):
62
+ prompt += f"Response {i+1}: {ans}\n\n"
63
+
64
+ prompt += "Analyze the semantic differences between the original and new responses. "
65
+ prompt += "Provide a drift percentage score (0-100%) that represents how much the meaning, "
66
+ prompt += "intent, or capabilities have changed between the two sets of responses. "
67
+ prompt += "Only return the numerical percentage value without any explanation or additional text."
68
+
69
+ # Get the drift assessment from the LLM
70
+ response = llm.invoke(prompt)
71
+ drift_text = str(response.content).strip()
72
+
73
+ # Extract just the numerical value if there's extra text
74
+ import re
75
+ drift_match = re.search(r'(\d+\.?\d*)', drift_text)
76
+ if drift_match:
77
+ drift_pct = float(drift_match.group(1))
78
+ else:
79
+ # Fallback if no number found
80
+ drift_pct = 0.0
81
 
82
  drift_text = f"{drift_pct}"
83
  return [
requirements.txt CHANGED
@@ -5,4 +5,5 @@ asyncio
5
  typing
6
  sqlalchemy
7
  psycopg2-binary
8
- fast-agent-mcp
 
 
5
  typing
6
  sqlalchemy
7
  psycopg2-binary
8
+ fast-agent-mcp
9
+ langchain[groq]
server.py CHANGED
@@ -1,5 +1,4 @@
1
  import asyncio
2
- import json
3
  import os
4
  from typing import Any, List, Dict
5
 
@@ -37,7 +36,7 @@ async def list_tools() -> List[types.Tool]:
37
  "type": "object",
38
  "properties": {
39
  "model": {"type": "string", "description": "The name of the model to run diagnostics on"},
40
- "model_capabilities": {"type": "string", "description": "Full description of the model's capabilities"}
41
  },
42
  "required": ["model", "model_capabilities"]
43
  },
 
1
  import asyncio
 
2
  import os
3
  from typing import Any, List, Dict
4
 
 
36
  "type": "object",
37
  "properties": {
38
  "model": {"type": "string", "description": "The name of the model to run diagnostics on"},
39
+ "model_capabilities": {"type": "string", "description": "Full description of the model's capabilities, along with the system prompt."}
40
  },
41
  "required": ["model", "model_capabilities"]
42
  },