Spaces:

philtoms
/

minilm-alice-base-rsft-v1

Sleeping

App Files Files Community

philtoms commited on Jul 11

Commit

a8c1cc6

verified ·

1 Parent(s): 4a81ba0

Create App.py

Browse files

Files changed (1) hide show

app.py +65 -0

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import gradio as gr
+import time
+from sentence_transformers import SentenceTransformer, util
+import os
+import json
+# Determine model path based on environment
+if "HF_SPACE_ID" in os.environ:
+    # Running on Hugging Face Spaces
+    # Assumes the model is in a repository with the same name as the space
+    space_name = os.environ["HF_SPACE_ID"].split("/")[-1]
+    model_path = f"{os.environ['HF_USER_NAME']}/{space_name}"
+    print(f"Running on HF Spaces. Using model: {model_path}")
+else:
+    # Running locally
+    model_path = "../models/minilm-alice-base-rsft-v1/final"
+    print(f"Running locally. Using model: {model_path}")
+# Load the model
+model = SentenceTransformer(model_path)
+# Load the dataset
+# Adjust the data path for local vs. HF environment
+data_path = "data/alice_pairs.jsonl" if "HF_SPACE_ID" in os.environ else "../data/alice_pairs.jsonl"
+dataset = []
+with open(data_path, "r") as f:
+    for line in f:
+        dataset.append(json.loads(line))
+corpus = [item["passage"] for item in dataset]
+corpus_embeddings = model.encode(corpus, convert_to_tensor=True)
+def find_similar(prompt, top_k):
+    start_time = time.time()
+    prompt_embedding = model.encode(prompt, convert_to_tensor=True)
+    cos_scores = util.cos_sim(prompt_embedding, corpus_embeddings)[0]
+    top_results = cos_scores.topk(k=int(top_k))
+    end_time = time.time()
+    results = []
+    for score, idx in zip(top_results[0], top_results[1]):
+        results.append((corpus[idx], score.item()))
+    return results, f"{(end_time - start_time) * 1000:.2f} ms"
+iface = gr.Interface(
+    fn=find_similar,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
+        gr.Slider(1, 20, value=5, step=1, label="Top K")
+    ],
+    outputs=[
+        gr.Dataframe(headers=["Response", "Score"]),
+        gr.Textbox(label="Time Taken")
+    ],
+    title="RSFT Alice embeddings",
+    description="Enter a prompt and get the most similar sentences from the corpus."
+)
+if __name__ == "__main__":
+    iface.launch()