mohbay commited on
Commit
30cf47b
·
verified ·
1 Parent(s): 1677a11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -21
app.py CHANGED
@@ -121,6 +121,12 @@ df2 = pd.read_csv("cleaned2.csv")
121
  embeddings = torch.load("embeddings1.pt")
122
  embeddings2 = torch.load("embeddings2.pt")
123
 
 
 
 
 
 
 
124
  def predict(text):
125
  if not text or text.strip() == "":
126
  return "No query provided"
@@ -131,32 +137,37 @@ def predict(text):
131
  sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
132
  sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
133
 
134
- # Get top 3 indices
135
- top3_idx1 = sim_scores1.topk(3).indices.cpu().numpy()
136
- top3_idx2 = sim_scores2.topk(3).indices.cpu().numpy()
 
 
 
 
 
 
137
 
138
- # Prepare results
139
  results = {
140
- "top1": [],
141
- "top2": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  }
143
 
144
- for idx in top3_idx1:
145
- results["top1"].append({
146
- "question": df.iloc[idx]["question"],
147
- "link": df.iloc[idx]["link"],
148
- "score": float(sim_scores1[idx])
149
- })
150
-
151
- for idx in top3_idx2:
152
- results["top2"].append({
153
- "question": df2.iloc[idx]["question"],
154
- "link": df2.iloc[idx]["link"],
155
- "score": float(sim_scores2[idx])
156
- })
157
-
158
  return results
159
-
160
 
161
  # Match the EXACT structure of your working translation app
162
  title = "Search CSV"
 
121
  embeddings = torch.load("embeddings1.pt")
122
  embeddings2 = torch.load("embeddings2.pt")
123
 
124
+ # Pre-extract DataFrame columns to avoid repeated iloc calls
125
+ df_questions = df["question"].values
126
+ df_links = df["link"].values
127
+ df2_questions = df2["question"].values
128
+ df2_links = df2["link"].values
129
+
130
  def predict(text):
131
  if not text or text.strip() == "":
132
  return "No query provided"
 
137
  sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
138
  sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
139
 
140
+ # Get top 3 values and indices in one call
141
+ top3_scores1, top3_idx1 = sim_scores1.topk(3)
142
+ top3_scores2, top3_idx2 = sim_scores2.topk(3)
143
+
144
+ # Convert to CPU once
145
+ top3_idx1_cpu = top3_idx1.cpu().numpy()
146
+ top3_idx2_cpu = top3_idx2.cpu().numpy()
147
+ top3_scores1_cpu = top3_scores1.cpu().numpy()
148
+ top3_scores2_cpu = top3_scores2.cpu().numpy()
149
 
150
+ # Prepare results using pre-extracted arrays
151
  results = {
152
+ "top1": [
153
+ {
154
+ "question": df_questions[idx],
155
+ "link": df_links[idx],
156
+ "score": float(score)
157
+ }
158
+ for idx, score in zip(top3_idx1_cpu, top3_scores1_cpu)
159
+ ],
160
+ "top2": [
161
+ {
162
+ "question": df2_questions[idx],
163
+ "link": df2_links[idx],
164
+ "score": float(score)
165
+ }
166
+ for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
167
+ ]
168
  }
169
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  return results
 
171
 
172
  # Match the EXACT structure of your working translation app
173
  title = "Search CSV"