Junaidb commited on
Commit
a08f08b
·
verified ·
1 Parent(s): 85b3ca7

Update llmeval.py

Browse files
Files changed (1) hide show
  1. llmeval.py +74 -13
llmeval.py CHANGED
@@ -194,32 +194,93 @@ class LLM_as_Evaluator():
194
 
195
  def Observation_LLM_Evaluator(self,promptversion):
196
 
197
- metrics=["biological_context_alignment","contextual_relevance_alignment","coherence","response_specificity"]
198
 
199
  data_to_evaluate=de.GetData(promptversion)
200
- import time
201
 
202
- for metric in metrics:
203
 
204
- messages =[
205
-
206
- {"role":"system","content":SYSTEM_FOR_BIO_CONTEXT_EVAL_FOR_OBSERVATION},
207
- {"role":"user","content":f"""
208
- Prompt :{data_to_evaluate["prompt"]}
209
- Context :{data_to_evaluate["context"]}
210
- Agent's Response : {data_to_evaluate["response"]}
211
- """}
212
- ]
213
- evaluation_response=self.___engine_core(messages=messages)
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  data={
216
 
 
217
  "promptversion":promptversion,
218
  "biological_context_alignment":"",
219
  "contextual_relevance_alignment":"",
220
  "unit_coherence":"",
221
  "response_specificity":""
222
  }
 
 
 
 
 
 
 
 
 
223
  de.Update(data=data)
224
 
225
 
 
194
 
195
  def Observation_LLM_Evaluator(self,promptversion):
196
 
197
+ metrics=["biological_context_alignment","contextual_relevance_alignment","response_specificity","unit_coherence"]
198
 
199
  data_to_evaluate=de.GetData(promptversion)
 
200
 
 
201
 
202
+ evaluation_responses=[]
203
+
204
+ for metric in metrics:
 
 
 
 
 
 
 
205
 
206
+ match metric:
207
+ case "biological_context_alignment":
208
+
209
+ messages =[
210
+
211
+ {"role":"system","content":SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT},
212
+ {"role":"user","content":f"""
213
+ Prompt :{data_to_evaluate["prompt"]}
214
+ Context :{data_to_evaluate["context"]}
215
+ Agent's Response : {data_to_evaluate["response"]}
216
+ """}
217
+ ]
218
+ evaluation_response=self.___engine_core(messages=messages)
219
+ evaluation_responses.append({"bio_context_alignment":evaluation_response})
220
+
221
+ case "contextual_relevance_alignment":
222
+
223
+ messages =[
224
+
225
+ {"role":"system","content":SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT},
226
+ {"role":"user","content":f"""
227
+ Prompt :{data_to_evaluate["prompt"]}
228
+ Context :{data_to_evaluate["context"]}
229
+ Agent's Response : {data_to_evaluate["response"]}
230
+ """}
231
+ ]
232
+ evaluation_response=self.___engine_core(messages=messages)
233
+ evaluation_responses.append({"contextual_relevance_alignment":evaluation_response})
234
+
235
+
236
+ case "response_specificity":
237
+
238
+ messages =[
239
+
240
+ {"role":"system","content":SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY},
241
+ {"role":"user","content":f"""
242
+ Prompt :{data_to_evaluate["prompt"]}
243
+ Context :{data_to_evaluate["context"]}
244
+ Agent's Response : {data_to_evaluate["response"]}
245
+ """}
246
+ ]
247
+ evaluation_response=self.___engine_core(messages=messages)
248
+ evaluation_responses.append({"response_specificity":evaluation_response})
249
+
250
+
251
+ case "unit_coherence":
252
+
253
+ messages =[
254
+
255
+ {"role":"system","content":SYSTEM_PROMPT_FOR_TRIAD_COHERENCE},
256
+ {"role":"user","content":f"""
257
+ Prompt :{data_to_evaluate["prompt"]}
258
+ Context :{data_to_evaluate["context"]}
259
+ Agent's Response : {data_to_evaluate["response"]}
260
+ """}
261
+ ]
262
+ evaluation_response=self.___engine_core(messages=messages)
263
+ evaluation_responses.append({"unit_coherence":evaluation_response})
264
+
265
+
266
  data={
267
 
268
+
269
  "promptversion":promptversion,
270
  "biological_context_alignment":"",
271
  "contextual_relevance_alignment":"",
272
  "unit_coherence":"",
273
  "response_specificity":""
274
  }
275
+
276
+ for resp in evaluation_responses:
277
+
278
+ data["biological_context_alignment"]=resp["biological_context_alignment"]
279
+ data["contextual_relevance_alignment"]=resp["contextual_relevance_alignment"]
280
+ data["unit_coherence"]=resp["unit_coherence"]
281
+ data["response_specificity"]=resp["response_specificity"]
282
+
283
+
284
  de.Update(data=data)
285
 
286