updated golden set code
Browse files
app.py
CHANGED
@@ -108,24 +108,24 @@ def evaluate_retrieved_docs(question: str, retrieved_docs: list):
|
|
108 |
def get_document_by_name(doc_name: str) -> str:
|
109 |
"""Retrieve the raw HTML content of a document by its name from the `data/` folder."""
|
110 |
|
111 |
-
#
|
112 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
113 |
data_path = os.path.join(script_dir, "data")
|
114 |
|
115 |
-
#
|
116 |
html_doc_name = doc_name.replace(".pdf", ".html")
|
117 |
full_path = os.path.join(data_path, html_doc_name)
|
118 |
|
119 |
-
#
|
120 |
if not os.path.exists(full_path):
|
121 |
print(f"β οΈ File not found: {full_path}")
|
122 |
return "No file found"
|
123 |
|
124 |
try:
|
125 |
-
#
|
126 |
with open(full_path, "r", encoding="utf-8") as file:
|
127 |
content = file.read()
|
128 |
-
return content #
|
129 |
|
130 |
except Exception as e:
|
131 |
print(f"β Error reading file {full_path}: {str(e)}")
|
@@ -225,23 +225,42 @@ def compare_text_similarity(text1, text2):
|
|
225 |
|
226 |
def evaluate_against_golden_set(question, model_answer):
|
227 |
"""Compare model-generated answers against the golden dataset."""
|
228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
golden_data = json.load(f)
|
230 |
|
231 |
-
# Find
|
|
|
232 |
for entry in golden_data:
|
233 |
-
if entry
|
234 |
-
expected_answer = entry
|
235 |
break
|
236 |
-
|
237 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
238 |
return None
|
239 |
|
240 |
-
# Evaluate similarity (simple text match, or use embedding similarity)
|
241 |
-
similarity_score = compare_text_similarity(model_answer, expected_answer)
|
242 |
-
|
243 |
-
print(f"π [Evaluation] Model vs. Expected Score: {similarity_score:.2f}")
|
244 |
-
return similarity_score
|
245 |
|
246 |
# **Post-Processing Node: Formats response using `ot_formatted_prompt`**
|
247 |
def post_processing_node(state) -> dict:
|
|
|
108 |
def get_document_by_name(doc_name: str) -> str:
|
109 |
"""Retrieve the raw HTML content of a document by its name from the `data/` folder."""
|
110 |
|
111 |
+
# Get the absolute path of the `data/` folder
|
112 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
113 |
data_path = os.path.join(script_dir, "data")
|
114 |
|
115 |
+
# Replace `.pdf` with `.html`
|
116 |
html_doc_name = doc_name.replace(".pdf", ".html")
|
117 |
full_path = os.path.join(data_path, html_doc_name)
|
118 |
|
119 |
+
# Check if the file exists
|
120 |
if not os.path.exists(full_path):
|
121 |
print(f"β οΈ File not found: {full_path}")
|
122 |
return "No file found"
|
123 |
|
124 |
try:
|
125 |
+
# Open and read the file content
|
126 |
with open(full_path, "r", encoding="utf-8") as file:
|
127 |
content = file.read()
|
128 |
+
return content # Return the raw HTML content
|
129 |
|
130 |
except Exception as e:
|
131 |
print(f"β Error reading file {full_path}: {str(e)}")
|
|
|
225 |
|
226 |
def evaluate_against_golden_set(question, model_answer):
|
227 |
"""Compare model-generated answers against the golden dataset."""
|
228 |
+
|
229 |
+
# Locate the Golden Dataset
|
230 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
231 |
+
data_path = os.path.join(script_dir, "data")
|
232 |
+
full_path = os.path.join(data_path, "testingset.json")
|
233 |
+
|
234 |
+
# Check if file exists
|
235 |
+
if not os.path.exists(full_path):
|
236 |
+
print(f"β Error: Golden dataset not found at {full_path}")
|
237 |
+
return None
|
238 |
+
|
239 |
+
# Load JSON Data
|
240 |
+
with open(full_path, "r", encoding="utf-8") as f:
|
241 |
golden_data = json.load(f)
|
242 |
|
243 |
+
# Find Matching Question in the Golden Dataset
|
244 |
+
expected_answer = None
|
245 |
for entry in golden_data:
|
246 |
+
if entry.get("question", "").strip() == question.strip():
|
247 |
+
expected_answer = entry.get("expected_answer", "").strip()
|
248 |
break
|
249 |
+
|
250 |
+
if not expected_answer:
|
251 |
+
print(f"β οΈ Question not found in the Golden Data Set: {question}")
|
252 |
+
return None
|
253 |
+
|
254 |
+
# Compare Model Answer vs Expected Answer
|
255 |
+
try:
|
256 |
+
similarity_score = compare_text_similarity(model_answer, expected_answer)
|
257 |
+
print(f"π [Evaluation] Model vs. Expected Score: {similarity_score:.2f}")
|
258 |
+
return similarity_score
|
259 |
+
|
260 |
+
except Exception as e:
|
261 |
+
print(f"β Error in similarity evaluation: {e}")
|
262 |
return None
|
263 |
|
|
|
|
|
|
|
|
|
|
|
264 |
|
265 |
# **Post-Processing Node: Formats response using `ot_formatted_prompt`**
|
266 |
def post_processing_node(state) -> dict:
|