Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -121,6 +121,12 @@ df2 = pd.read_csv("cleaned2.csv")
|
|
121 |
embeddings = torch.load("embeddings1.pt")
|
122 |
embeddings2 = torch.load("embeddings2.pt")
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
def predict(text):
|
125 |
if not text or text.strip() == "":
|
126 |
return "No query provided"
|
@@ -131,32 +137,37 @@ def predict(text):
|
|
131 |
sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
|
132 |
sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
|
133 |
|
134 |
-
# Get top 3 indices
|
135 |
-
top3_idx1 = sim_scores1.topk(3)
|
136 |
-
top3_idx2 = sim_scores2.topk(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
-
# Prepare results
|
139 |
results = {
|
140 |
-
"top1": [
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
}
|
143 |
|
144 |
-
for idx in top3_idx1:
|
145 |
-
results["top1"].append({
|
146 |
-
"question": df.iloc[idx]["question"],
|
147 |
-
"link": df.iloc[idx]["link"],
|
148 |
-
"score": float(sim_scores1[idx])
|
149 |
-
})
|
150 |
-
|
151 |
-
for idx in top3_idx2:
|
152 |
-
results["top2"].append({
|
153 |
-
"question": df2.iloc[idx]["question"],
|
154 |
-
"link": df2.iloc[idx]["link"],
|
155 |
-
"score": float(sim_scores2[idx])
|
156 |
-
})
|
157 |
-
|
158 |
return results
|
159 |
-
|
160 |
|
161 |
# Match the EXACT structure of your working translation app
|
162 |
title = "Search CSV"
|
|
|
121 |
embeddings = torch.load("embeddings1.pt")
|
122 |
embeddings2 = torch.load("embeddings2.pt")
|
123 |
|
124 |
+
# Pre-extract DataFrame columns to avoid repeated iloc calls
|
125 |
+
df_questions = df["question"].values
|
126 |
+
df_links = df["link"].values
|
127 |
+
df2_questions = df2["question"].values
|
128 |
+
df2_links = df2["link"].values
|
129 |
+
|
130 |
def predict(text):
|
131 |
if not text or text.strip() == "":
|
132 |
return "No query provided"
|
|
|
137 |
sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
|
138 |
sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
|
139 |
|
140 |
+
# Get top 3 values and indices in one call
|
141 |
+
top3_scores1, top3_idx1 = sim_scores1.topk(3)
|
142 |
+
top3_scores2, top3_idx2 = sim_scores2.topk(3)
|
143 |
+
|
144 |
+
# Convert to CPU once
|
145 |
+
top3_idx1_cpu = top3_idx1.cpu().numpy()
|
146 |
+
top3_idx2_cpu = top3_idx2.cpu().numpy()
|
147 |
+
top3_scores1_cpu = top3_scores1.cpu().numpy()
|
148 |
+
top3_scores2_cpu = top3_scores2.cpu().numpy()
|
149 |
|
150 |
+
# Prepare results using pre-extracted arrays
|
151 |
results = {
|
152 |
+
"top1": [
|
153 |
+
{
|
154 |
+
"question": df_questions[idx],
|
155 |
+
"link": df_links[idx],
|
156 |
+
"score": float(score)
|
157 |
+
}
|
158 |
+
for idx, score in zip(top3_idx1_cpu, top3_scores1_cpu)
|
159 |
+
],
|
160 |
+
"top2": [
|
161 |
+
{
|
162 |
+
"question": df2_questions[idx],
|
163 |
+
"link": df2_links[idx],
|
164 |
+
"score": float(score)
|
165 |
+
}
|
166 |
+
for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
|
167 |
+
]
|
168 |
}
|
169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
return results
|
|
|
171 |
|
172 |
# Match the EXACT structure of your working translation app
|
173 |
title = "Search CSV"
|