Spaces:

shivXy
/

otmidterm

Sleeping

App Files Files Community

shivXy commited on Feb 25

Commit

13c47e2

1 Parent(s): 61688c1

updated golden set code

Browse files

Files changed (1) hide show

app.py +35 -16

app.py CHANGED Viewed

@@ -108,24 +108,24 @@ def evaluate_retrieved_docs(question: str, retrieved_docs: list):
 def get_document_by_name(doc_name: str) -> str:
     """Retrieve the raw HTML content of a document by its name from the `data/` folder."""
-    # ✅ Get the absolute path of the `data/` folder
     script_dir = os.path.dirname(os.path.abspath(__file__))
     data_path = os.path.join(script_dir, "data")
-    # ✅ Replace `.pdf` with `.html`
     html_doc_name = doc_name.replace(".pdf", ".html")
     full_path = os.path.join(data_path, html_doc_name)
-    # ✅ Check if the file exists
     if not os.path.exists(full_path):
         print(f"⚠️ File not found: {full_path}")
         return "No file found"
     try:
-        # ✅ Open and read the file content
         with open(full_path, "r", encoding="utf-8") as file:
             content = file.read()
-        return content  # ✅ Return the raw HTML content
     except Exception as e:
         print(f"❌ Error reading file {full_path}: {str(e)}")
@@ -225,23 +225,42 @@ def compare_text_similarity(text1, text2):
 def evaluate_against_golden_set(question, model_answer):
     """Compare model-generated answers against the golden dataset."""
-    with open("testingset.json", "r", encoding="utf-8") as f:
         golden_data = json.load(f)
-    # Find the corresponding question in the dataset
     for entry in golden_data:
-        if entry["question"].strip() == question.strip():
-            expected_answer = entry["expected_answer"]
             break
-    else:
-        print("⚠️ Question not found in the Golden Data Set.")
         return None
-    # Evaluate similarity (simple text match, or use embedding similarity)
-    similarity_score = compare_text_similarity(model_answer, expected_answer)
-    print(f"📊 [Evaluation] Model vs. Expected Score: {similarity_score:.2f}")
-    return similarity_score
 # **Post-Processing Node: Formats response using `ot_formatted_prompt`**
 def post_processing_node(state) -> dict:

 def get_document_by_name(doc_name: str) -> str:
     """Retrieve the raw HTML content of a document by its name from the `data/` folder."""
+    # Get the absolute path of the `data/` folder
     script_dir = os.path.dirname(os.path.abspath(__file__))
     data_path = os.path.join(script_dir, "data")
+    # Replace `.pdf` with `.html`
     html_doc_name = doc_name.replace(".pdf", ".html")
     full_path = os.path.join(data_path, html_doc_name)
+    # Check if the file exists
     if not os.path.exists(full_path):
         print(f"⚠️ File not found: {full_path}")
         return "No file found"
     try:
+        # Open and read the file content
         with open(full_path, "r", encoding="utf-8") as file:
             content = file.read()
+        return content  # Return the raw HTML content
     except Exception as e:
         print(f"❌ Error reading file {full_path}: {str(e)}")
 def evaluate_against_golden_set(question, model_answer):
     """Compare model-generated answers against the golden dataset."""
+    # Locate the Golden Dataset
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    data_path = os.path.join(script_dir, "data")
+    full_path = os.path.join(data_path, "testingset.json")
+    # Check if file exists
+    if not os.path.exists(full_path):
+        print(f"❌ Error: Golden dataset not found at {full_path}")
+        return None
+    # Load JSON Data
+    with open(full_path, "r", encoding="utf-8") as f:
         golden_data = json.load(f)
+    # Find Matching Question in the Golden Dataset
+    expected_answer = None
     for entry in golden_data:
+        if entry.get("question", "").strip() == question.strip():
+            expected_answer = entry.get("expected_answer", "").strip()
             break
+    if not expected_answer:
+        print(f"⚠️ Question not found in the Golden Data Set: {question}")
+        return None
+    # Compare Model Answer vs Expected Answer
+    try:
+        similarity_score = compare_text_similarity(model_answer, expected_answer)
+        print(f"📊 [Evaluation] Model vs. Expected Score: {similarity_score:.2f}")
+        return similarity_score
+    except Exception as e:
+        print(f"❌ Error in similarity evaluation: {e}")
         return None
 # **Post-Processing Node: Formats response using `ot_formatted_prompt`**
 def post_processing_node(state) -> dict: