shivXy commited on
Commit
13c47e2
Β·
1 Parent(s): 61688c1

updated golden set code

Browse files
Files changed (1) hide show
  1. app.py +35 -16
app.py CHANGED
@@ -108,24 +108,24 @@ def evaluate_retrieved_docs(question: str, retrieved_docs: list):
108
  def get_document_by_name(doc_name: str) -> str:
109
  """Retrieve the raw HTML content of a document by its name from the `data/` folder."""
110
 
111
- # βœ… Get the absolute path of the `data/` folder
112
  script_dir = os.path.dirname(os.path.abspath(__file__))
113
  data_path = os.path.join(script_dir, "data")
114
 
115
- # βœ… Replace `.pdf` with `.html`
116
  html_doc_name = doc_name.replace(".pdf", ".html")
117
  full_path = os.path.join(data_path, html_doc_name)
118
 
119
- # βœ… Check if the file exists
120
  if not os.path.exists(full_path):
121
  print(f"⚠️ File not found: {full_path}")
122
  return "No file found"
123
 
124
  try:
125
- # βœ… Open and read the file content
126
  with open(full_path, "r", encoding="utf-8") as file:
127
  content = file.read()
128
- return content # βœ… Return the raw HTML content
129
 
130
  except Exception as e:
131
  print(f"❌ Error reading file {full_path}: {str(e)}")
@@ -225,23 +225,42 @@ def compare_text_similarity(text1, text2):
225
 
226
  def evaluate_against_golden_set(question, model_answer):
227
  """Compare model-generated answers against the golden dataset."""
228
- with open("testingset.json", "r", encoding="utf-8") as f:
 
 
 
 
 
 
 
 
 
 
 
 
229
  golden_data = json.load(f)
230
 
231
- # Find the corresponding question in the dataset
 
232
  for entry in golden_data:
233
- if entry["question"].strip() == question.strip():
234
- expected_answer = entry["expected_answer"]
235
  break
236
- else:
237
- print("⚠️ Question not found in the Golden Data Set.")
 
 
 
 
 
 
 
 
 
 
 
238
  return None
239
 
240
- # Evaluate similarity (simple text match, or use embedding similarity)
241
- similarity_score = compare_text_similarity(model_answer, expected_answer)
242
-
243
- print(f"πŸ“Š [Evaluation] Model vs. Expected Score: {similarity_score:.2f}")
244
- return similarity_score
245
 
246
  # **Post-Processing Node: Formats response using `ot_formatted_prompt`**
247
  def post_processing_node(state) -> dict:
 
108
  def get_document_by_name(doc_name: str) -> str:
109
  """Retrieve the raw HTML content of a document by its name from the `data/` folder."""
110
 
111
+ # Get the absolute path of the `data/` folder
112
  script_dir = os.path.dirname(os.path.abspath(__file__))
113
  data_path = os.path.join(script_dir, "data")
114
 
115
+ # Replace `.pdf` with `.html`
116
  html_doc_name = doc_name.replace(".pdf", ".html")
117
  full_path = os.path.join(data_path, html_doc_name)
118
 
119
+ # Check if the file exists
120
  if not os.path.exists(full_path):
121
  print(f"⚠️ File not found: {full_path}")
122
  return "No file found"
123
 
124
  try:
125
+ # Open and read the file content
126
  with open(full_path, "r", encoding="utf-8") as file:
127
  content = file.read()
128
+ return content # Return the raw HTML content
129
 
130
  except Exception as e:
131
  print(f"❌ Error reading file {full_path}: {str(e)}")
 
225
 
226
  def evaluate_against_golden_set(question, model_answer):
227
  """Compare model-generated answers against the golden dataset."""
228
+
229
+ # Locate the Golden Dataset
230
+ script_dir = os.path.dirname(os.path.abspath(__file__))
231
+ data_path = os.path.join(script_dir, "data")
232
+ full_path = os.path.join(data_path, "testingset.json")
233
+
234
+ # Check if file exists
235
+ if not os.path.exists(full_path):
236
+ print(f"❌ Error: Golden dataset not found at {full_path}")
237
+ return None
238
+
239
+ # Load JSON Data
240
+ with open(full_path, "r", encoding="utf-8") as f:
241
  golden_data = json.load(f)
242
 
243
+ # Find Matching Question in the Golden Dataset
244
+ expected_answer = None
245
  for entry in golden_data:
246
+ if entry.get("question", "").strip() == question.strip():
247
+ expected_answer = entry.get("expected_answer", "").strip()
248
  break
249
+
250
+ if not expected_answer:
251
+ print(f"⚠️ Question not found in the Golden Data Set: {question}")
252
+ return None
253
+
254
+ # Compare Model Answer vs Expected Answer
255
+ try:
256
+ similarity_score = compare_text_similarity(model_answer, expected_answer)
257
+ print(f"πŸ“Š [Evaluation] Model vs. Expected Score: {similarity_score:.2f}")
258
+ return similarity_score
259
+
260
+ except Exception as e:
261
+ print(f"❌ Error in similarity evaluation: {e}")
262
  return None
263
 
 
 
 
 
 
264
 
265
  # **Post-Processing Node: Formats response using `ot_formatted_prompt`**
266
  def post_processing_node(state) -> dict: