Spaces:

mohbay
/

searchcsv2

Running

App Files Files Community

mohbay commited on Jul 5

Commit

f46813a

verified ·

1 Parent(s): b1bede2

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -7

app.py CHANGED Viewed

@@ -83,12 +83,12 @@ def predict(text):
     if len(query_words) <= 2:
         # Short queries: prioritize exact word matches
         weight = 0.6
-    elif len(query_words) <= 8:
         # Medium queries: balanced
-        weight = 0.5
     else:
         # Long queries: prioritize semantic similarity
-        weight = 0.3
     # Collect top1 with better scoring
     combined1 = []
@@ -154,10 +154,47 @@ def predict(text):
             "combined_score": combined_score
         })
-    # Get top results - consider more candidates then filter
-    top1 = sorted(combined1, key=lambda x: x["combined_score"], reverse=True)[:5]
-    top2 = sorted(combined2, key=lambda x: x["combined_score"], reverse=True)[:5]
-    top3 = sorted(combined3, key=lambda x: x["combined_score"], reverse=True)[:5]
     results = {

     if len(query_words) <= 2:
         # Short queries: prioritize exact word matches
         weight = 0.6
+    elif len(query_words) <= 5:
         # Medium queries: balanced
+        weight = 0.4
     else:
         # Long queries: prioritize semantic similarity
+        weight = 0.25
     # Collect top1 with better scoring
     combined1 = []
             "combined_score": combined_score
         })
+    # Get top results with mixed ranking strategy
+    def get_mixed_top_results(combined_results):
+        # Sort by combined score and get top 3
+        by_combined = sorted(combined_results, key=lambda x: x["combined_score"], reverse=True)
+        top_3_combined = by_combined[:3]
+        # Get the questions from top 3 to avoid duplicates
+        top_3_questions = {item["question"] for item in top_3_combined}
+        # Sort by word overlap score and find first one not in top 3
+        by_word = sorted(combined_results, key=lambda x: x["word_overlap_score"], reverse=True)
+        word_pick = None
+        for item in by_word:
+            if item["question"] not in top_3_questions:
+                word_pick = item
+                break
+        # Sort by semantic score and find first one not in top 3 or word pick
+        by_semantic = sorted(combined_results, key=lambda x: x["cosine_score"], reverse=True)
+        semantic_pick = None
+        excluded_questions = top_3_questions.copy()
+        if word_pick:
+            excluded_questions.add(word_pick["question"])
+        for item in by_semantic:
+            if item["question"] not in excluded_questions:
+                semantic_pick = item
+                break
+        # Combine results
+        final_results = top_3_combined.copy()
+        if word_pick:
+            final_results.append(word_pick)
+        if semantic_pick:
+            final_results.append(semantic_pick)
+        return final_results
+    top1 = get_mixed_top_results(combined1)
+    top2 = get_mixed_top_results(combined2)
+    top3 = get_mixed_top_results(combined3)
     results = {