mohbay commited on
Commit
f46813a
·
verified ·
1 Parent(s): b1bede2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -7
app.py CHANGED
@@ -83,12 +83,12 @@ def predict(text):
83
  if len(query_words) <= 2:
84
  # Short queries: prioritize exact word matches
85
  weight = 0.6
86
- elif len(query_words) <= 8:
87
  # Medium queries: balanced
88
- weight = 0.5
89
  else:
90
  # Long queries: prioritize semantic similarity
91
- weight = 0.3
92
 
93
  # Collect top1 with better scoring
94
  combined1 = []
@@ -154,10 +154,47 @@ def predict(text):
154
  "combined_score": combined_score
155
  })
156
 
157
- # Get top results - consider more candidates then filter
158
- top1 = sorted(combined1, key=lambda x: x["combined_score"], reverse=True)[:5]
159
- top2 = sorted(combined2, key=lambda x: x["combined_score"], reverse=True)[:5]
160
- top3 = sorted(combined3, key=lambda x: x["combined_score"], reverse=True)[:5]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  results = {
163
 
 
83
  if len(query_words) <= 2:
84
  # Short queries: prioritize exact word matches
85
  weight = 0.6
86
+ elif len(query_words) <= 5:
87
  # Medium queries: balanced
88
+ weight = 0.4
89
  else:
90
  # Long queries: prioritize semantic similarity
91
+ weight = 0.25
92
 
93
  # Collect top1 with better scoring
94
  combined1 = []
 
154
  "combined_score": combined_score
155
  })
156
 
157
+ # Get top results with mixed ranking strategy
158
+ def get_mixed_top_results(combined_results):
159
+ # Sort by combined score and get top 3
160
+ by_combined = sorted(combined_results, key=lambda x: x["combined_score"], reverse=True)
161
+ top_3_combined = by_combined[:3]
162
+
163
+ # Get the questions from top 3 to avoid duplicates
164
+ top_3_questions = {item["question"] for item in top_3_combined}
165
+
166
+ # Sort by word overlap score and find first one not in top 3
167
+ by_word = sorted(combined_results, key=lambda x: x["word_overlap_score"], reverse=True)
168
+ word_pick = None
169
+ for item in by_word:
170
+ if item["question"] not in top_3_questions:
171
+ word_pick = item
172
+ break
173
+
174
+ # Sort by semantic score and find first one not in top 3 or word pick
175
+ by_semantic = sorted(combined_results, key=lambda x: x["cosine_score"], reverse=True)
176
+ semantic_pick = None
177
+ excluded_questions = top_3_questions.copy()
178
+ if word_pick:
179
+ excluded_questions.add(word_pick["question"])
180
+
181
+ for item in by_semantic:
182
+ if item["question"] not in excluded_questions:
183
+ semantic_pick = item
184
+ break
185
+
186
+ # Combine results
187
+ final_results = top_3_combined.copy()
188
+ if word_pick:
189
+ final_results.append(word_pick)
190
+ if semantic_pick:
191
+ final_results.append(semantic_pick)
192
+
193
+ return final_results
194
+
195
+ top1 = get_mixed_top_results(combined1)
196
+ top2 = get_mixed_top_results(combined2)
197
+ top3 = get_mixed_top_results(combined3)
198
 
199
  results = {
200