Junaidb commited on
Commit
295be2b
·
verified ·
1 Parent(s): 0c9af45

Update llmeval.py

Browse files
Files changed (1) hide show
  1. llmeval.py +17 -15
llmeval.py CHANGED
@@ -8,10 +8,8 @@ client = Groq(api_key=AK)
8
  de=DatabaseEngine()
9
 
10
 
11
-
12
-
13
-
14
- SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT=f'''
15
  Task:
16
  Evaluate the biological quality of a Prompt, Context, and Response from an {agenttype} Agent on a 0–10 continuous scale.
17
 
@@ -52,7 +50,7 @@ No extra commentary, no markdown, no explanations before or after.
52
  Think step by step
53
  '''
54
 
55
- SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT=f'''
56
  Task:
57
  Evaluate how well the {agenttype} Response addresses the specific Prompt by leveraging the provided Context on a 0–10 continuous scale.
58
 
@@ -93,7 +91,7 @@ Think step by step
93
  '''
94
 
95
 
96
- SYSTEM_PROMPT_FOR_TRIAD_COHERENCE=f'''
97
  Task:
98
  Evaluate the logical and semantic coherence of the Prompt, Context, and Response of {agenttype} as a unified set on a 0–10 continuous scale.
99
 
@@ -131,7 +129,7 @@ Think step by step
131
  '''
132
 
133
 
134
- SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY=f'''
135
  Task:
136
  Evaluate how focused, detailed, and context-aware the {agenttype} Response is with respect to the Prompt and Context on a 0–10 continuous scale.
137
 
@@ -170,6 +168,9 @@ Think step by step
170
 
171
  '''
172
 
 
 
 
173
  class LLM_as_Evaluator():
174
 
175
  def __init__(self):
@@ -198,6 +199,7 @@ class LLM_as_Evaluator():
198
 
199
  data_to_evaluate=de.GetData(promptversion)
200
 
 
201
 
202
  evaluation_responses=[]
203
 
@@ -263,7 +265,7 @@ class LLM_as_Evaluator():
263
  evaluation_responses.append({"unit_coherence":evaluation_response})
264
 
265
 
266
- data={
267
 
268
 
269
  "promptversion":promptversion,
@@ -271,17 +273,17 @@ class LLM_as_Evaluator():
271
  "contextual_relevance_alignment":"",
272
  "unit_coherence":"",
273
  "response_specificity":""
274
- }
275
 
276
- for resp in evaluation_responses:
277
 
278
- data["biological_context_alignment"]=resp["biological_context_alignment"]
279
- data["contextual_relevance_alignment"]=resp["contextual_relevance_alignment"]
280
- data["unit_coherence"]=resp["unit_coherence"]
281
- data["response_specificity"]=resp["response_specificity"]
282
 
283
 
284
- de.Update(data=data)
285
 
286
 
287
 
 
8
  de=DatabaseEngine()
9
 
10
 
11
+ def PROMPT_UPDATER(agenttype):
12
+ SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT=f'''
 
 
13
  Task:
14
  Evaluate the biological quality of a Prompt, Context, and Response from an {agenttype} Agent on a 0–10 continuous scale.
15
 
 
50
  Think step by step
51
  '''
52
 
53
+ SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT=f'''
54
  Task:
55
  Evaluate how well the {agenttype} Response addresses the specific Prompt by leveraging the provided Context on a 0–10 continuous scale.
56
 
 
91
  '''
92
 
93
 
94
+ SYSTEM_PROMPT_FOR_TRIAD_COHERENCE=f'''
95
  Task:
96
  Evaluate the logical and semantic coherence of the Prompt, Context, and Response of {agenttype} as a unified set on a 0–10 continuous scale.
97
 
 
129
  '''
130
 
131
 
132
+ SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY=f'''
133
  Task:
134
  Evaluate how focused, detailed, and context-aware the {agenttype} Response is with respect to the Prompt and Context on a 0–10 continuous scale.
135
 
 
168
 
169
  '''
170
 
171
+ return SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,SYSTEM_PROMPT_FOR_TRIAD_COHERENCE
172
+
173
+
174
  class LLM_as_Evaluator():
175
 
176
  def __init__(self):
 
199
 
200
  data_to_evaluate=de.GetData(promptversion)
201
 
202
+ SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,SYSTEM_PROMPT_FOR_TRIAD_COHERENCE = PROMPT_UPDATER("observation agent")
203
 
204
  evaluation_responses=[]
205
 
 
265
  evaluation_responses.append({"unit_coherence":evaluation_response})
266
 
267
 
268
+ data={
269
 
270
 
271
  "promptversion":promptversion,
 
273
  "contextual_relevance_alignment":"",
274
  "unit_coherence":"",
275
  "response_specificity":""
276
+ }
277
 
278
+ for resp in evaluation_responses:
279
 
280
+ data["biological_context_alignment"]=resp["biological_context_alignment"]
281
+ data["contextual_relevance_alignment"]=resp["contextual_relevance_alignment"]
282
+ data["unit_coherence"]=resp["unit_coherence"]
283
+ data["response_specificity"]=resp["response_specificity"]
284
 
285
 
286
+ de.Update(data=data)
287
 
288
 
289