Junaidb commited on
Commit
c9b0834
·
verified ·
1 Parent(s): 4f65796

Update llmeval.py

Browse files
Files changed (1) hide show
  1. llmeval.py +37 -72
llmeval.py CHANGED
@@ -180,16 +180,17 @@ class LLM_as_Evaluator():
180
  def ___engine_core(self,messages):
181
 
182
  completion = client.chat.completions.create(
183
- model="deepseek-r1-distill-llama-70b",
184
  messages=messages,
185
  temperature=0.0,
186
- max_completion_tokens=6000,
187
  #top_p=1,
188
  stream=False,
189
  stop=None,
190
  )
191
  actual_message=completion.choices[0].message.content
192
- return re.sub(r"<think>.*?</think>", "", actual_message, flags=re.DOTALL).strip()
 
193
 
194
 
195
 
@@ -199,80 +200,44 @@ class LLM_as_Evaluator():
199
 
200
  data_to_evaluate=de.GetData(promptversion)
201
 
202
- SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,SYSTEM_PROMPT_FOR_TRIAD_COHERENCE = PROMPT_UPDATER("observation agent")
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
  evaluation_responses=[]
205
-
206
  for metric in metrics:
207
-
208
- match metric:
209
- case "biological_context_alignment":
210
-
211
- messages =[
212
-
213
- {"role":"system","content":SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT},
214
- {"role":"user","content":f"""
215
- Prompt :{data_to_evaluate["prompt"]}
216
- Context :{data_to_evaluate["context"]}
217
- Agent's Response : {data_to_evaluate["response"]}
218
- """}
219
- ]
220
- evaluation_response=self.___engine_core(messages=messages)
221
- evaluation_responses.append({"biological_context_alignment":evaluation_response})
222
-
223
- case "contextual_relevance_alignment":
224
-
225
- messages =[
226
-
227
- {"role":"system","content":SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT},
228
- {"role":"user","content":f"""
229
- Prompt :{data_to_evaluate["prompt"]}
230
- Context :{data_to_evaluate["context"]}
231
- Agent's Response : {data_to_evaluate["response"]}
232
- """}
233
- ]
234
- evaluation_response=self.___engine_core(messages=messages)
235
- evaluation_responses.append({"contextual_relevance_alignment":evaluation_response})
236
-
237
-
238
- case "response_specificity":
239
-
240
- messages =[
241
-
242
- {"role":"system","content":SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY},
243
- {"role":"user","content":f"""
244
- Prompt :{data_to_evaluate["prompt"]}
245
- Context :{data_to_evaluate["context"]}
246
- Agent's Response : {data_to_evaluate["response"]}
247
- """}
248
- ]
249
- evaluation_response=self.___engine_core(messages=messages)
250
- evaluation_responses.append({"response_specificity":evaluation_response})
251
-
252
-
253
- case "unit_coherence":
254
-
255
- messages =[
256
-
257
- {"role":"system","content":SYSTEM_PROMPT_FOR_TRIAD_COHERENCE},
258
- {"role":"user","content":f"""
259
- Prompt :{data_to_evaluate["prompt"]}
260
- Context :{data_to_evaluate["context"]}
261
- Agent's Response : {data_to_evaluate["response"]}
262
- """}
263
- ]
264
- evaluation_response=self.___engine_core(messages=messages)
265
- evaluation_responses.append({"unit_coherence":evaluation_response})
266
-
267
 
 
 
268
  data={
269
-
270
-
271
- "promptversion":promptversion,
272
- "biological_context_alignment":"",
273
- "contextual_relevance_alignment":"",
274
- "unit_coherence":"",
275
- "response_specificity":""
276
  }
277
 
278
  for resp in evaluation_responses:
 
180
  def ___engine_core(self,messages):
181
 
182
  completion = client.chat.completions.create(
183
+ model="llama-3.1-8b-instant",
184
  messages=messages,
185
  temperature=0.0,
186
+ max_completion_tokens=5000,
187
  #top_p=1,
188
  stream=False,
189
  stop=None,
190
  )
191
  actual_message=completion.choices[0].message.content
192
+ #return re.sub(r"<think>.*?</think>", "", actual_message, flags=re.DOTALL).strip()
193
+ return actual_message
194
 
195
 
196
 
 
200
 
201
  data_to_evaluate=de.GetData(promptversion)
202
 
203
+ (
204
+ SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,
205
+ SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,
206
+ SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,
207
+ SYSTEM_PROMPT_FOR_TRIAD_COHERENCE
208
+ ) = PROMPT_UPDATER("observation agent")
209
+
210
+ prompt_map = {
211
+ "biological_context_alignment": SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,
212
+ "contextual_relevance_alignment": SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,
213
+ "response_specificity": SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,
214
+ "unit_coherence": SYSTEM_PROMPT_FOR_TRIAD_COHERENCE
215
+ }
216
 
217
  evaluation_responses=[]
 
218
  for metric in metrics:
219
+ system_prompt = prompt_map[metric]
220
+
221
+ messages = [
222
+ {"role": "system", "content": system_prompt},
223
+ {"role": "user", "content": f"""
224
+ Prompt: {data_to_evaluate["prompt"]}
225
+ Context: {data_to_evaluate["context"]}
226
+ Agent's Response: {data_to_evaluate["response"]}
227
+ """}
228
+ ]
229
+
230
+ evaluation_response = self.___engine_core(messages=messages)
231
+ evaluation_responses.append({metric: evaluation_response})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
+
234
+
235
  data={
236
+ "promptversion":promptversion,
237
+ "biological_context_alignment":"",
238
+ "contextual_relevance_alignment":"",
239
+ "unit_coherence":"",
240
+ "response_specificity":""
 
 
241
  }
242
 
243
  for resp in evaluation_responses: