Spaces:
Sleeping
Sleeping
Updated app with pre-built assets
Browse files- .gitattributes +1 -0
- app.py +5 -5
- assets/data.parquet +3 -0
- assets/embeddings.npy +3 -0
- assets/faiss.index +3 -0
- assets/meta.json +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
assets/faiss.index filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -76,7 +76,7 @@ def _ensure_index():
|
|
76 |
|
77 |
def recommend(query_text: str, top_k: int = 3) -> pd.DataFrame:
|
78 |
_ensure_index()
|
79 |
-
_ensure_models()
|
80 |
q_vec = _EMBED_MODEL.encode([query_text], normalize_embeddings=True).astype("float32")
|
81 |
scores, idxs = _INDEX.search(q_vec, top_k)
|
82 |
out = _DATA_DF.iloc[idxs[0]].copy()
|
@@ -168,7 +168,7 @@ def _copies_neighbor(s: str, neighbors_df: pd.DataFrame) -> bool:
|
|
168 |
if _jaccard(s_toks, _tokens(t_low)) >= 0.7:
|
169 |
return True
|
170 |
try:
|
171 |
-
_ensure_models()
|
172 |
s_vec = _EMBED_MODEL.encode([s])[0]; s_vec = s_vec / np.linalg.norm(s_vec)
|
173 |
for _, row in neighbors_df.head(3).iterrows():
|
174 |
t = str(row.get("tagline", "")).strip()
|
@@ -193,14 +193,14 @@ def _clean_slogan(text: str, max_words: int = 8) -> str:
|
|
193 |
def _score_candidates(query: str, cands: List[str], neighbors_df: pd.DataFrame) -> List[tuple]:
|
194 |
if not cands:
|
195 |
return []
|
196 |
-
_ensure_models()
|
197 |
ce_scores = np.asarray(_RERANKER.predict([(query, s) for s in cands]), dtype=np.float32) / 5.0
|
198 |
q_toks = _tokens(query)
|
199 |
results = []
|
200 |
|
201 |
neighbor_vecs = []
|
202 |
if neighbors_df is not None and not neighbors_df.empty:
|
203 |
-
_ensure_models()
|
204 |
for _, row in neighbors_df.head(3).iterrows():
|
205 |
t = str(row.get("tagline","")).strip()
|
206 |
if t:
|
@@ -220,7 +220,7 @@ def _score_candidates(query: str, cands: List[str], neighbors_df: pd.DataFrame)
|
|
220 |
n_pen = 0.0
|
221 |
if neighbor_vecs:
|
222 |
try:
|
223 |
-
_ensure_models()
|
224 |
s_vec = _EMBED_MODEL.encode([s])[0]; s_vec = s_vec / np.linalg.norm(s_vec)
|
225 |
sim_max = max(float(np.dot(s_vec, nv)) for nv in neighbor_vecs) if neighbor_vecs else 0.0
|
226 |
n_pen = sim_max
|
|
|
76 |
|
77 |
def recommend(query_text: str, top_k: int = 3) -> pd.DataFrame:
|
78 |
_ensure_index()
|
79 |
+
_ensure_models()
|
80 |
q_vec = _EMBED_MODEL.encode([query_text], normalize_embeddings=True).astype("float32")
|
81 |
scores, idxs = _INDEX.search(q_vec, top_k)
|
82 |
out = _DATA_DF.iloc[idxs[0]].copy()
|
|
|
168 |
if _jaccard(s_toks, _tokens(t_low)) >= 0.7:
|
169 |
return True
|
170 |
try:
|
171 |
+
_ensure_models()
|
172 |
s_vec = _EMBED_MODEL.encode([s])[0]; s_vec = s_vec / np.linalg.norm(s_vec)
|
173 |
for _, row in neighbors_df.head(3).iterrows():
|
174 |
t = str(row.get("tagline", "")).strip()
|
|
|
193 |
def _score_candidates(query: str, cands: List[str], neighbors_df: pd.DataFrame) -> List[tuple]:
|
194 |
if not cands:
|
195 |
return []
|
196 |
+
_ensure_models()
|
197 |
ce_scores = np.asarray(_RERANKER.predict([(query, s) for s in cands]), dtype=np.float32) / 5.0
|
198 |
q_toks = _tokens(query)
|
199 |
results = []
|
200 |
|
201 |
neighbor_vecs = []
|
202 |
if neighbors_df is not None and not neighbors_df.empty:
|
203 |
+
_ensure_models()
|
204 |
for _, row in neighbors_df.head(3).iterrows():
|
205 |
t = str(row.get("tagline","")).strip()
|
206 |
if t:
|
|
|
220 |
n_pen = 0.0
|
221 |
if neighbor_vecs:
|
222 |
try:
|
223 |
+
_ensure_models()
|
224 |
s_vec = _EMBED_MODEL.encode([s])[0]; s_vec = s_vec / np.linalg.norm(s_vec)
|
225 |
sim_max = max(float(np.dot(s_vec, nv)) for nv in neighbor_vecs) if neighbor_vecs else 0.0
|
226 |
n_pen = sim_max
|
assets/data.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77fbc4451a8048cbe3c037b337fa90e6d52fb5dc0d815844803de185f2843ce5
|
3 |
+
size 5942482
|
assets/embeddings.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f4cfcdb193612010ec8507ef90766eb95be0845e45a5640a46602e4b745da7d
|
3 |
+
size 106346624
|
assets/faiss.index
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4cd2c7bf43c63dd857c96af91a599b19f1e964aebe123eee743dabbaceb3b78
|
3 |
+
size 106346541
|
assets/meta.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"row_count": 34618, "dim": 768, "model": "sentence-transformers/all-mpnet-base-v2"}
|