Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -83,12 +83,12 @@ def predict(text):
|
|
83 |
if len(query_words) <= 2:
|
84 |
# Short queries: prioritize exact word matches
|
85 |
weight = 0.6
|
86 |
-
elif len(query_words) <=
|
87 |
# Medium queries: balanced
|
88 |
-
weight = 0.
|
89 |
else:
|
90 |
# Long queries: prioritize semantic similarity
|
91 |
-
weight = 0.
|
92 |
|
93 |
# Collect top1 with better scoring
|
94 |
combined1 = []
|
@@ -154,10 +154,47 @@ def predict(text):
|
|
154 |
"combined_score": combined_score
|
155 |
})
|
156 |
|
157 |
-
# Get top results
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
results = {
|
163 |
|
|
|
83 |
if len(query_words) <= 2:
|
84 |
# Short queries: prioritize exact word matches
|
85 |
weight = 0.6
|
86 |
+
elif len(query_words) <= 5:
|
87 |
# Medium queries: balanced
|
88 |
+
weight = 0.4
|
89 |
else:
|
90 |
# Long queries: prioritize semantic similarity
|
91 |
+
weight = 0.25
|
92 |
|
93 |
# Collect top1 with better scoring
|
94 |
combined1 = []
|
|
|
154 |
"combined_score": combined_score
|
155 |
})
|
156 |
|
157 |
+
# Get top results with mixed ranking strategy
|
158 |
+
def get_mixed_top_results(combined_results):
|
159 |
+
# Sort by combined score and get top 3
|
160 |
+
by_combined = sorted(combined_results, key=lambda x: x["combined_score"], reverse=True)
|
161 |
+
top_3_combined = by_combined[:3]
|
162 |
+
|
163 |
+
# Get the questions from top 3 to avoid duplicates
|
164 |
+
top_3_questions = {item["question"] for item in top_3_combined}
|
165 |
+
|
166 |
+
# Sort by word overlap score and find first one not in top 3
|
167 |
+
by_word = sorted(combined_results, key=lambda x: x["word_overlap_score"], reverse=True)
|
168 |
+
word_pick = None
|
169 |
+
for item in by_word:
|
170 |
+
if item["question"] not in top_3_questions:
|
171 |
+
word_pick = item
|
172 |
+
break
|
173 |
+
|
174 |
+
# Sort by semantic score and find first one not in top 3 or word pick
|
175 |
+
by_semantic = sorted(combined_results, key=lambda x: x["cosine_score"], reverse=True)
|
176 |
+
semantic_pick = None
|
177 |
+
excluded_questions = top_3_questions.copy()
|
178 |
+
if word_pick:
|
179 |
+
excluded_questions.add(word_pick["question"])
|
180 |
+
|
181 |
+
for item in by_semantic:
|
182 |
+
if item["question"] not in excluded_questions:
|
183 |
+
semantic_pick = item
|
184 |
+
break
|
185 |
+
|
186 |
+
# Combine results
|
187 |
+
final_results = top_3_combined.copy()
|
188 |
+
if word_pick:
|
189 |
+
final_results.append(word_pick)
|
190 |
+
if semantic_pick:
|
191 |
+
final_results.append(semantic_pick)
|
192 |
+
|
193 |
+
return final_results
|
194 |
+
|
195 |
+
top1 = get_mixed_top_results(combined1)
|
196 |
+
top2 = get_mixed_top_results(combined2)
|
197 |
+
top3 = get_mixed_top_results(combined3)
|
198 |
|
199 |
results = {
|
200 |
|