Update llmeval.py
Browse files- llmeval.py +17 -15
llmeval.py
CHANGED
@@ -8,10 +8,8 @@ client = Groq(api_key=AK)
|
|
8 |
de=DatabaseEngine()
|
9 |
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT=f'''
|
15 |
Task:
|
16 |
Evaluate the biological quality of a Prompt, Context, and Response from an {agenttype} Agent on a 0–10 continuous scale.
|
17 |
|
@@ -52,7 +50,7 @@ No extra commentary, no markdown, no explanations before or after.
|
|
52 |
Think step by step
|
53 |
'''
|
54 |
|
55 |
-
SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT=f'''
|
56 |
Task:
|
57 |
Evaluate how well the {agenttype} Response addresses the specific Prompt by leveraging the provided Context on a 0–10 continuous scale.
|
58 |
|
@@ -93,7 +91,7 @@ Think step by step
|
|
93 |
'''
|
94 |
|
95 |
|
96 |
-
SYSTEM_PROMPT_FOR_TRIAD_COHERENCE=f'''
|
97 |
Task:
|
98 |
Evaluate the logical and semantic coherence of the Prompt, Context, and Response of {agenttype} as a unified set on a 0–10 continuous scale.
|
99 |
|
@@ -131,7 +129,7 @@ Think step by step
|
|
131 |
'''
|
132 |
|
133 |
|
134 |
-
SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY=f'''
|
135 |
Task:
|
136 |
Evaluate how focused, detailed, and context-aware the {agenttype} Response is with respect to the Prompt and Context on a 0–10 continuous scale.
|
137 |
|
@@ -170,6 +168,9 @@ Think step by step
|
|
170 |
|
171 |
'''
|
172 |
|
|
|
|
|
|
|
173 |
class LLM_as_Evaluator():
|
174 |
|
175 |
def __init__(self):
|
@@ -198,6 +199,7 @@ class LLM_as_Evaluator():
|
|
198 |
|
199 |
data_to_evaluate=de.GetData(promptversion)
|
200 |
|
|
|
201 |
|
202 |
evaluation_responses=[]
|
203 |
|
@@ -263,7 +265,7 @@ class LLM_as_Evaluator():
|
|
263 |
evaluation_responses.append({"unit_coherence":evaluation_response})
|
264 |
|
265 |
|
266 |
-
|
267 |
|
268 |
|
269 |
"promptversion":promptversion,
|
@@ -271,17 +273,17 @@ class LLM_as_Evaluator():
|
|
271 |
"contextual_relevance_alignment":"",
|
272 |
"unit_coherence":"",
|
273 |
"response_specificity":""
|
274 |
-
|
275 |
|
276 |
-
|
277 |
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
|
283 |
|
284 |
-
|
285 |
|
286 |
|
287 |
|
|
|
8 |
de=DatabaseEngine()
|
9 |
|
10 |
|
11 |
+
def PROMPT_UPDATER(agenttype):
|
12 |
+
SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT=f'''
|
|
|
|
|
13 |
Task:
|
14 |
Evaluate the biological quality of a Prompt, Context, and Response from an {agenttype} Agent on a 0–10 continuous scale.
|
15 |
|
|
|
50 |
Think step by step
|
51 |
'''
|
52 |
|
53 |
+
SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT=f'''
|
54 |
Task:
|
55 |
Evaluate how well the {agenttype} Response addresses the specific Prompt by leveraging the provided Context on a 0–10 continuous scale.
|
56 |
|
|
|
91 |
'''
|
92 |
|
93 |
|
94 |
+
SYSTEM_PROMPT_FOR_TRIAD_COHERENCE=f'''
|
95 |
Task:
|
96 |
Evaluate the logical and semantic coherence of the Prompt, Context, and Response of {agenttype} as a unified set on a 0–10 continuous scale.
|
97 |
|
|
|
129 |
'''
|
130 |
|
131 |
|
132 |
+
SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY=f'''
|
133 |
Task:
|
134 |
Evaluate how focused, detailed, and context-aware the {agenttype} Response is with respect to the Prompt and Context on a 0–10 continuous scale.
|
135 |
|
|
|
168 |
|
169 |
'''
|
170 |
|
171 |
+
return SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,SYSTEM_PROMPT_FOR_TRIAD_COHERENCE
|
172 |
+
|
173 |
+
|
174 |
class LLM_as_Evaluator():
|
175 |
|
176 |
def __init__(self):
|
|
|
199 |
|
200 |
data_to_evaluate=de.GetData(promptversion)
|
201 |
|
202 |
+
SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,SYSTEM_PROMPT_FOR_TRIAD_COHERENCE = PROMPT_UPDATER("observation agent")
|
203 |
|
204 |
evaluation_responses=[]
|
205 |
|
|
|
265 |
evaluation_responses.append({"unit_coherence":evaluation_response})
|
266 |
|
267 |
|
268 |
+
data={
|
269 |
|
270 |
|
271 |
"promptversion":promptversion,
|
|
|
273 |
"contextual_relevance_alignment":"",
|
274 |
"unit_coherence":"",
|
275 |
"response_specificity":""
|
276 |
+
}
|
277 |
|
278 |
+
for resp in evaluation_responses:
|
279 |
|
280 |
+
data["biological_context_alignment"]=resp["biological_context_alignment"]
|
281 |
+
data["contextual_relevance_alignment"]=resp["contextual_relevance_alignment"]
|
282 |
+
data["unit_coherence"]=resp["unit_coherence"]
|
283 |
+
data["response_specificity"]=resp["response_specificity"]
|
284 |
|
285 |
|
286 |
+
de.Update(data=data)
|
287 |
|
288 |
|
289 |
|