mohbay commited on
Commit
2e553d1
·
verified ·
1 Parent(s): fbd82e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -120,13 +120,14 @@ df = pd.read_csv("cleaned1.csv")
120
  df2 = pd.read_csv("cleaned2.csv")
121
  embeddings = torch.load("embeddings1.pt")
122
  embeddings2 = torch.load("embeddings2.pt")
123
-
124
  # Pre-extract DataFrame columns to avoid repeated iloc calls
125
  df_questions = df["question"].values
126
  df_links = df["link"].values
127
  df2_questions = df2["question"].values
128
  df2_links = df2["link"].values
129
-
 
130
  def predict(text):
131
  if not text or text.strip() == "":
132
  return "No query provided"
@@ -136,17 +137,20 @@ def predict(text):
136
  # Compute similarity scores
137
  sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
138
  sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
 
139
 
140
  # Get top 3 values and indices in one call
141
  top3_scores1, top3_idx1 = sim_scores1.topk(3)
142
  top3_scores2, top3_idx2 = sim_scores2.topk(3)
143
-
144
  # Convert to CPU once
145
  top3_idx1_cpu = top3_idx1.cpu().numpy()
146
  top3_idx2_cpu = top3_idx2.cpu().numpy()
 
 
147
  top3_scores1_cpu = top3_scores1.cpu().numpy()
148
  top3_scores2_cpu = top3_scores2.cpu().numpy()
149
-
150
  # Prepare results using pre-extracted arrays
151
  results = {
152
  "top1": [
@@ -165,6 +169,14 @@ def predict(text):
165
  }
166
  for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
167
  ]
 
 
 
 
 
 
 
 
168
  }
169
 
170
  return results
 
120
  df2 = pd.read_csv("cleaned2.csv")
121
  embeddings = torch.load("embeddings1.pt")
122
  embeddings2 = torch.load("embeddings2.pt")
123
+ embeddings3 = torch.load("embeddings3.pt")
124
  # Pre-extract DataFrame columns to avoid repeated iloc calls
125
  df_questions = df["question"].values
126
  df_links = df["link"].values
127
  df2_questions = df2["question"].values
128
  df2_links = df2["link"].values
129
+ df3_questions = df3["question"].values
130
+ df3_links = df3["link"].values
131
  def predict(text):
132
  if not text or text.strip() == "":
133
  return "No query provided"
 
137
  # Compute similarity scores
138
  sim_scores1 = util.pytorch_cos_sim(query_embedding, embeddings)[0]
139
  sim_scores2 = util.pytorch_cos_sim(query_embedding, embeddings2)[0]
140
+ sim_scores3 = util.pytorch_cos_sim(query_embedding, embeddings3)[0]
141
 
142
  # Get top 3 values and indices in one call
143
  top3_scores1, top3_idx1 = sim_scores1.topk(3)
144
  top3_scores2, top3_idx2 = sim_scores2.topk(3)
145
+ top3_scores3, top3_idx3 = sim_scores3.topk(3)
146
  # Convert to CPU once
147
  top3_idx1_cpu = top3_idx1.cpu().numpy()
148
  top3_idx2_cpu = top3_idx2.cpu().numpy()
149
+ top3_idx3_cpu = top3_idx3.cpu().numpy()
150
+
151
  top3_scores1_cpu = top3_scores1.cpu().numpy()
152
  top3_scores2_cpu = top3_scores2.cpu().numpy()
153
+ top3_scores3_cpu = top3_scores3.cpu().numpy()
154
  # Prepare results using pre-extracted arrays
155
  results = {
156
  "top1": [
 
169
  }
170
  for idx, score in zip(top3_idx2_cpu, top3_scores2_cpu)
171
  ]
172
+ "top3": [
173
+ {
174
+ "question": df3_questions[idx],
175
+ "link": df3_links[idx],
176
+ "score": float(score)
177
+ }
178
+ for idx, score in zip(top3_idx3_cpu, top3_scores3_cpu)
179
+ ]
180
  }
181
 
182
  return results