space_9

Sleeping

App Files Files Community

Frenchizer commited on Jan 26

Commit

bf1807a

verified ·

1 Parent(s): 6b27907

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -42

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModel
-from sklearn.metrics.pairwise import cosine_similarity
 import torch
-import numpy as np
 from gradio_client import Client
 from functools import lru_cache
@@ -17,49 +15,21 @@ def load_model_and_tokenizer():
 # Load the model and tokenizer
 tokenizer, model = load_model_and_tokenizer()
-# Precompute label embeddings
-labels = [
-    "aerospace", "anatomy", "anthropology", "art",
-    "automotive", "blockchain", "biology", "chemistry",
-    "cryptocurrency", "data science", "design", "e-commerce",
-    "education", "engineering", "entertainment", "environment",
-    "fashion", "finance", "food commerce", "general",
-    "gaming", "healthcare", "history", "html",
-    "information technology", "IT", "keywords", "legal",
-    "literature", "machine learning", "marketing", "medicine",
-    "music", "personal development", "philosophy", "physics",
-    "politics", "poetry", "programming", "real estate", "retail",
-    "robotics", "slang", "social media", "speech", "sports",
-    "sustained", "technical", "theater", "tourism", "travel"
-]
-@lru_cache(maxsize=1)
-def precompute_label_embeddings():
-    inputs = tokenizer(labels, padding=True, truncation=True, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model(**inputs)
-    return outputs.last_hidden_state.mean(dim=1).numpy()  # Mean pooling for embeddings
-label_embeddings = precompute_label_embeddings()
-# Function to detect context (optimized)
-def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold=0.8, max_results=3):
-    # Encode the input text
     inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
-    input_embedding = outputs.last_hidden_state.mean(dim=1).numpy()  # Mean pooling for embedding
-    # Compute cosine similarities (optimized)
-    similarities = cosine_similarity(input_embedding, label_embeddings)[0]
-    # Find top-N labels based on thresholds
-    top_indices = np.argsort(similarities)[-max_results:][::-1]
-    top_labels = [labels[i] for i in top_indices if similarities[i] >= fallback_threshold]
-    # Return high-confidence labels if any, otherwise fallback labels
-    high_conf_labels = [label for label in top_labels if similarities[labels.index(label)] >= high_confidence_threshold]
-    return high_conf_labels if high_conf_labels else top_labels[:max_results]
 # Translation client
 translation_client = Client("Frenchizer/space_3")

 import gradio as gr
 from transformers import AutoTokenizer, AutoModel
 import torch
 from gradio_client import Client
 from functools import lru_cache
 # Load the model and tokenizer
 tokenizer, model = load_model_and_tokenizer()
+# Function to detect context (simplified)
+def detect_context(input_text):
+    # Tokenize the input text
     inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
+    # Run the model
     with torch.no_grad():
         outputs = model(**inputs)
+    # Get the embedding (mean pooling)
+    input_embedding = outputs.last_hidden_state.mean(dim=1).numpy()
+    # For now, return a placeholder context
+    # You can replace this with a more sophisticated logic if needed
+    return ["general"]
 # Translation client
 translation_client = Client("Frenchizer/space_3")