Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -120,13 +120,14 @@ df = pd.read_csv("cleaned1.csv")
|
|
120 |
df2 = pd.read_csv("cleaned2.csv")
|
121 |
embeddings = torch.load("embeddings1.pt")
|
122 |
embeddings2 = torch.load("embeddings2.pt")
|
123 |
-
|
124 |
# Pre-extract DataFrame columns to avoid repeated iloc calls
|
125 |
df_questions = df["question"].values
|
126 |
df_links = df["link"].values
|
127 |
df2_questions = df2["question"].values
|
128 |
df2_links = df2["link"].values
|
129 |
-
|
|
|
130 |
def predict(text):
|
131 |
if not text or text.strip() == "":
|
132 |
return "No query provided"
|
@@ -136,17 +137,20 @@ def predict(text):
|
|
136 |
# Compute similarity scores
|
137 |
sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
|
138 |
sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
|
|
|
139 |
|
140 |
# Get top 3 values and indices in one call
|
141 |
top3_scores1, top3_idx1 = sim_scores1.topk(3)
|
142 |
top3_scores2, top3_idx2 = sim_scores2.topk(3)
|
143 |
-
|
144 |
# Convert to CPU once
|
145 |
top3_idx1_cpu = top3_idx1.cpu().numpy()
|
146 |
top3_idx2_cpu = top3_idx2.cpu().numpy()
|
|
|
|
|
147 |
top3_scores1_cpu = top3_scores1.cpu().numpy()
|
148 |
top3_scores2_cpu = top3_scores2.cpu().numpy()
|
149 |
-
|
150 |
# Prepare results using pre-extracted arrays
|
151 |
results = {
|
152 |
"top1": [
|
@@ -165,6 +169,14 @@ def predict(text):
|
|
165 |
}
|
166 |
for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
|
167 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
}
|
169 |
|
170 |
return results
|
|
|
120 |
df2 = pd.read_csv("cleaned2.csv")
|
121 |
embeddings = torch.load("embeddings1.pt")
|
122 |
embeddings2 = torch.load("embeddings2.pt")
|
123 |
+
embeddings3 = torch.load("embeddings3.pt")
|
124 |
# Pre-extract DataFrame columns to avoid repeated iloc calls
|
125 |
df_questions = df["question"].values
|
126 |
df_links = df["link"].values
|
127 |
df2_questions = df2["question"].values
|
128 |
df2_links = df2["link"].values
|
129 |
+
df3_questions = df3["question"].values
|
130 |
+
df3_links = df3["link"].values
|
131 |
def predict(text):
|
132 |
if not text or text.strip() == "":
|
133 |
return "No query provided"
|
|
|
137 |
# Compute similarity scores
|
138 |
sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
|
139 |
sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
|
140 |
+
sim_scores3 = util.pytorch_cos_sim(query_embedding, embeddings3)[0]
|
141 |
|
142 |
# Get top 3 values and indices in one call
|
143 |
top3_scores1, top3_idx1 = sim_scores1.topk(3)
|
144 |
top3_scores2, top3_idx2 = sim_scores2.topk(3)
|
145 |
+
top3_scores3, top3_idx3 = sim_scores3.topk(3)
|
146 |
# Convert to CPU once
|
147 |
top3_idx1_cpu = top3_idx1.cpu().numpy()
|
148 |
top3_idx2_cpu = top3_idx2.cpu().numpy()
|
149 |
+
top3_idx3_cpu = top3_idx3.cpu().numpy()
|
150 |
+
|
151 |
top3_scores1_cpu = top3_scores1.cpu().numpy()
|
152 |
top3_scores2_cpu = top3_scores2.cpu().numpy()
|
153 |
+
top3_scores3_cpu = top3_scores3.cpu().numpy()
|
154 |
# Prepare results using pre-extracted arrays
|
155 |
results = {
|
156 |
"top1": [
|
|
|
169 |
}
|
170 |
for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
|
171 |
]
|
172 |
+
"top3": [
|
173 |
+
{
|
174 |
+
"question": df3_questions[idx],
|
175 |
+
"link": df3_links[idx],
|
176 |
+
"score": float(score)
|
177 |
+
}
|
178 |
+
for idx, score in zip(top3_idx3_cpu, top3_scores3_cpu)
|
179 |
+
]
|
180 |
}
|
181 |
|
182 |
return results
|