space_9

Sleeping

App Files Files Community

Frenchizer commited on Jan 26

Commit

3f40561

verified ·

1 Parent(s): 86d39de

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -7

app.py CHANGED Viewed

@@ -1,9 +1,22 @@
 import gradio as gr
-from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
-# Load the model and precompute label embeddings
-context_model = SentenceTransformer("all-MiniLM-L6-v2")
 labels = [
     "aerospace", "anatomy", "anthropology", "art",
     "automotive", "blockchain", "biology", "chemistry",
@@ -18,10 +31,27 @@ labels = [
     "robotics", "slang", "social media", "speech", "sports",
     "sustained", "technical", "theater", "tourism", "travel"
 ]
-label_embeddings = context_model.encode(labels)
 def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold=0.8, max_results=3):
-    input_embedding = context_model.encode([input_text])
     similarities = cosine_similarity(input_embedding, label_embeddings)[0]
     for label, score in zip(labels, similarities):
@@ -33,7 +63,6 @@ def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold
     return [label for label, score in sorted_labels] if sorted_labels else ["general"]
 # Translation client
-from gradio_client import Client
 translation_client = Client("Frenchizer/space_3")
 def translate_text(input_text):
@@ -54,4 +83,4 @@ interface = gr.Interface(
     description="Translate text from English to French with context detection."
 )
-interface.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModel
 from sklearn.metrics.pairwise import cosine_similarity
+import torch
+import numpy as np
+from gradio_client import Client
+# Cache the model and tokenizer
+@gr.cache()
+def load_model_and_tokenizer():
+    model_name = "Frenchizer/all-MiniLM-L6-v2"  # Replace with your Space and model path
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModel.from_pretrained(model_name)
+    return tokenizer, model
+# Load the model and tokenizer
+tokenizer, model = load_model_and_tokenizer()
+# Precompute label embeddings
 labels = [
     "aerospace", "anatomy", "anthropology", "art",
     "automotive", "blockchain", "biology", "chemistry",
     "robotics", "slang", "social media", "speech", "sports",
     "sustained", "technical", "theater", "tourism", "travel"
 ]
+@gr.cache()
+def precompute_label_embeddings():
+    def encode_text(texts):
+        inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
+        with torch.no_grad():
+            outputs = model(**inputs)
+        return outputs.last_hidden_state.mean(dim=1).numpy()  # Use mean pooling for embeddings
+    return encode_text(labels)
+label_embeddings = precompute_label_embeddings()
+# Function to detect context
 def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold=0.8, max_results=3):
+    def encode_text(texts):
+        inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
+        with torch.no_grad():
+            outputs = model(**inputs)
+        return outputs.last_hidden_state.mean(dim=1).numpy()  # Use mean pooling for embeddings
+    input_embedding = encode_text([input_text])
     similarities = cosine_similarity(input_embedding, label_embeddings)[0]
     for label, score in zip(labels, similarities):
     return [label for label, score in sorted_labels] if sorted_labels else ["general"]
 # Translation client
 translation_client = Client("Frenchizer/space_3")
 def translate_text(input_text):
     description="Translate text from English to French with context detection."
 )
+interface.launch()