Update llmeval.py
Browse files- llmeval.py +74 -13
llmeval.py
CHANGED
@@ -194,32 +194,93 @@ class LLM_as_Evaluator():
|
|
194 |
|
195 |
def Observation_LLM_Evaluator(self,promptversion):
|
196 |
|
197 |
-
metrics=["biological_context_alignment","contextual_relevance_alignment","
|
198 |
|
199 |
data_to_evaluate=de.GetData(promptversion)
|
200 |
-
import time
|
201 |
|
202 |
-
for metric in metrics:
|
203 |
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
{"role":"user","content":f"""
|
208 |
-
Prompt :{data_to_evaluate["prompt"]}
|
209 |
-
Context :{data_to_evaluate["context"]}
|
210 |
-
Agent's Response : {data_to_evaluate["response"]}
|
211 |
-
"""}
|
212 |
-
]
|
213 |
-
evaluation_response=self.___engine_core(messages=messages)
|
214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
data={
|
216 |
|
|
|
217 |
"promptversion":promptversion,
|
218 |
"biological_context_alignment":"",
|
219 |
"contextual_relevance_alignment":"",
|
220 |
"unit_coherence":"",
|
221 |
"response_specificity":""
|
222 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
de.Update(data=data)
|
224 |
|
225 |
|
|
|
194 |
|
195 |
def Observation_LLM_Evaluator(self,promptversion):
|
196 |
|
197 |
+
metrics=["biological_context_alignment","contextual_relevance_alignment","response_specificity","unit_coherence"]
|
198 |
|
199 |
data_to_evaluate=de.GetData(promptversion)
|
|
|
200 |
|
|
|
201 |
|
202 |
+
evaluation_responses=[]
|
203 |
+
|
204 |
+
for metric in metrics:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
+
match metric:
|
207 |
+
case "biological_context_alignment":
|
208 |
+
|
209 |
+
messages =[
|
210 |
+
|
211 |
+
{"role":"system","content":SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT},
|
212 |
+
{"role":"user","content":f"""
|
213 |
+
Prompt :{data_to_evaluate["prompt"]}
|
214 |
+
Context :{data_to_evaluate["context"]}
|
215 |
+
Agent's Response : {data_to_evaluate["response"]}
|
216 |
+
"""}
|
217 |
+
]
|
218 |
+
evaluation_response=self.___engine_core(messages=messages)
|
219 |
+
evaluation_responses.append({"bio_context_alignment":evaluation_response})
|
220 |
+
|
221 |
+
case "contextual_relevance_alignment":
|
222 |
+
|
223 |
+
messages =[
|
224 |
+
|
225 |
+
{"role":"system","content":SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT},
|
226 |
+
{"role":"user","content":f"""
|
227 |
+
Prompt :{data_to_evaluate["prompt"]}
|
228 |
+
Context :{data_to_evaluate["context"]}
|
229 |
+
Agent's Response : {data_to_evaluate["response"]}
|
230 |
+
"""}
|
231 |
+
]
|
232 |
+
evaluation_response=self.___engine_core(messages=messages)
|
233 |
+
evaluation_responses.append({"contextual_relevance_alignment":evaluation_response})
|
234 |
+
|
235 |
+
|
236 |
+
case "response_specificity":
|
237 |
+
|
238 |
+
messages =[
|
239 |
+
|
240 |
+
{"role":"system","content":SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY},
|
241 |
+
{"role":"user","content":f"""
|
242 |
+
Prompt :{data_to_evaluate["prompt"]}
|
243 |
+
Context :{data_to_evaluate["context"]}
|
244 |
+
Agent's Response : {data_to_evaluate["response"]}
|
245 |
+
"""}
|
246 |
+
]
|
247 |
+
evaluation_response=self.___engine_core(messages=messages)
|
248 |
+
evaluation_responses.append({"response_specificity":evaluation_response})
|
249 |
+
|
250 |
+
|
251 |
+
case "unit_coherence":
|
252 |
+
|
253 |
+
messages =[
|
254 |
+
|
255 |
+
{"role":"system","content":SYSTEM_PROMPT_FOR_TRIAD_COHERENCE},
|
256 |
+
{"role":"user","content":f"""
|
257 |
+
Prompt :{data_to_evaluate["prompt"]}
|
258 |
+
Context :{data_to_evaluate["context"]}
|
259 |
+
Agent's Response : {data_to_evaluate["response"]}
|
260 |
+
"""}
|
261 |
+
]
|
262 |
+
evaluation_response=self.___engine_core(messages=messages)
|
263 |
+
evaluation_responses.append({"unit_coherence":evaluation_response})
|
264 |
+
|
265 |
+
|
266 |
data={
|
267 |
|
268 |
+
|
269 |
"promptversion":promptversion,
|
270 |
"biological_context_alignment":"",
|
271 |
"contextual_relevance_alignment":"",
|
272 |
"unit_coherence":"",
|
273 |
"response_specificity":""
|
274 |
}
|
275 |
+
|
276 |
+
for resp in evaluation_responses:
|
277 |
+
|
278 |
+
data["biological_context_alignment"]=resp["biological_context_alignment"]
|
279 |
+
data["contextual_relevance_alignment"]=resp["contextual_relevance_alignment"]
|
280 |
+
data["unit_coherence"]=resp["unit_coherence"]
|
281 |
+
data["response_specificity"]=resp["response_specificity"]
|
282 |
+
|
283 |
+
|
284 |
de.Update(data=data)
|
285 |
|
286 |
|