asaf1602 commited on
Commit
f1153db
·
verified ·
1 Parent(s): 27f04d2

Updated app with pre-built assets

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/faiss.index filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -76,7 +76,7 @@ def _ensure_index():
76
 
77
  def recommend(query_text: str, top_k: int = 3) -> pd.DataFrame:
78
  _ensure_index()
79
- _ensure_models() # Make sure the embedder is ready
80
  q_vec = _EMBED_MODEL.encode([query_text], normalize_embeddings=True).astype("float32")
81
  scores, idxs = _INDEX.search(q_vec, top_k)
82
  out = _DATA_DF.iloc[idxs[0]].copy()
@@ -168,7 +168,7 @@ def _copies_neighbor(s: str, neighbors_df: pd.DataFrame) -> bool:
168
  if _jaccard(s_toks, _tokens(t_low)) >= 0.7:
169
  return True
170
  try:
171
- _ensure_models() # Make sure the embedder is ready
172
  s_vec = _EMBED_MODEL.encode([s])[0]; s_vec = s_vec / np.linalg.norm(s_vec)
173
  for _, row in neighbors_df.head(3).iterrows():
174
  t = str(row.get("tagline", "")).strip()
@@ -193,14 +193,14 @@ def _clean_slogan(text: str, max_words: int = 8) -> str:
193
  def _score_candidates(query: str, cands: List[str], neighbors_df: pd.DataFrame) -> List[tuple]:
194
  if not cands:
195
  return []
196
- _ensure_models() # Make sure the cross-encoder is ready
197
  ce_scores = np.asarray(_RERANKER.predict([(query, s) for s in cands]), dtype=np.float32) / 5.0
198
  q_toks = _tokens(query)
199
  results = []
200
 
201
  neighbor_vecs = []
202
  if neighbors_df is not None and not neighbors_df.empty:
203
- _ensure_models() # Make sure the embedder is ready
204
  for _, row in neighbors_df.head(3).iterrows():
205
  t = str(row.get("tagline","")).strip()
206
  if t:
@@ -220,7 +220,7 @@ def _score_candidates(query: str, cands: List[str], neighbors_df: pd.DataFrame)
220
  n_pen = 0.0
221
  if neighbor_vecs:
222
  try:
223
- _ensure_models() # Make sure the embedder is ready
224
  s_vec = _EMBED_MODEL.encode([s])[0]; s_vec = s_vec / np.linalg.norm(s_vec)
225
  sim_max = max(float(np.dot(s_vec, nv)) for nv in neighbor_vecs) if neighbor_vecs else 0.0
226
  n_pen = sim_max
 
76
 
77
  def recommend(query_text: str, top_k: int = 3) -> pd.DataFrame:
78
  _ensure_index()
79
+ _ensure_models()
80
  q_vec = _EMBED_MODEL.encode([query_text], normalize_embeddings=True).astype("float32")
81
  scores, idxs = _INDEX.search(q_vec, top_k)
82
  out = _DATA_DF.iloc[idxs[0]].copy()
 
168
  if _jaccard(s_toks, _tokens(t_low)) >= 0.7:
169
  return True
170
  try:
171
+ _ensure_models()
172
  s_vec = _EMBED_MODEL.encode([s])[0]; s_vec = s_vec / np.linalg.norm(s_vec)
173
  for _, row in neighbors_df.head(3).iterrows():
174
  t = str(row.get("tagline", "")).strip()
 
193
  def _score_candidates(query: str, cands: List[str], neighbors_df: pd.DataFrame) -> List[tuple]:
194
  if not cands:
195
  return []
196
+ _ensure_models()
197
  ce_scores = np.asarray(_RERANKER.predict([(query, s) for s in cands]), dtype=np.float32) / 5.0
198
  q_toks = _tokens(query)
199
  results = []
200
 
201
  neighbor_vecs = []
202
  if neighbors_df is not None and not neighbors_df.empty:
203
+ _ensure_models()
204
  for _, row in neighbors_df.head(3).iterrows():
205
  t = str(row.get("tagline","")).strip()
206
  if t:
 
220
  n_pen = 0.0
221
  if neighbor_vecs:
222
  try:
223
+ _ensure_models()
224
  s_vec = _EMBED_MODEL.encode([s])[0]; s_vec = s_vec / np.linalg.norm(s_vec)
225
  sim_max = max(float(np.dot(s_vec, nv)) for nv in neighbor_vecs) if neighbor_vecs else 0.0
226
  n_pen = sim_max
assets/data.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77fbc4451a8048cbe3c037b337fa90e6d52fb5dc0d815844803de185f2843ce5
3
+ size 5942482
assets/embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4cfcdb193612010ec8507ef90766eb95be0845e45a5640a46602e4b745da7d
3
+ size 106346624
assets/faiss.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4cd2c7bf43c63dd857c96af91a599b19f1e964aebe123eee743dabbaceb3b78
3
+ size 106346541
assets/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"row_count": 34618, "dim": 768, "model": "sentence-transformers/all-mpnet-base-v2"}