space_9 / app.py
Frenchizer's picture
Update app.py
4662c92 verified
raw
history blame
3.92 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import torch
import numpy as np
from gradio_client import Client
from functools import lru_cache
# Cache the model and tokenizer using lru_cache
@lru_cache(maxsize=1)
def load_model_and_tokenizer():
model_name = "./all-MiniLM-L6-v2" # Replace with your Space and model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
return tokenizer, model
# Load the model and tokenizer
tokenizer, model = load_model_and_tokenizer()
# Precompute label embeddings
labels = [
"aerospace", "anatomy", "anthropology", "art",
"automotive", "blockchain", "biology", "chemistry",
"cryptocurrency", "data science", "design", "e-commerce",
"education", "engineering", "entertainment", "environment",
"fashion", "finance", "food commerce", "general",
"gaming", "healthcare", "history", "html",
"information technology", "IT", "keywords", "legal",
"literature", "machine learning", "marketing", "medicine",
"music", "personal development", "philosophy", "physics",
"politics", "poetry", "programming", "real estate", "retail",
"robotics", "slang", "social media", "speech", "sports",
"sustained", "technical", "theater", "tourism", "travel"
]
@lru_cache(maxsize=1)
def precompute_label_embeddings():
inputs = tokenizer(labels, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
return outputs.last_hidden_state.mean(dim=1).numpy() # Mean pooling for embeddings
label_embeddings = precompute_label_embeddings()
# Softmax function to convert scores to probabilities
def softmax(x):
exp_x = np.exp(x - np.max(x)) # Subtract max for numerical stability
return exp_x / exp_x.sum()
# Function to detect context
def detect_context(input_text, top_n=3):
# Encode the input text
inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
input_embedding = outputs.last_hidden_state.mean(dim=1).numpy() # Mean pooling for embedding
# Compute cosine similarities
similarities = cosine_similarity(input_embedding, label_embeddings)[0]
# Apply softmax to convert similarities to probabilities
probabilities = softmax(similarities)
# Pair each label with its probability
label_probabilities = list(zip(labels, probabilities))
# Sort by probability in descending order
label_probabilities.sort(key=lambda x: x[1], reverse=True)
# Select the top N contexts
top_contexts = label_probabilities[:top_n]
return top_contexts
# Translation client
translation_client = Client("Frenchizer/space_7")
def translate_text(input_text):
# Translate the input text
return translation_client.predict(input_text)
def process_request(input_text):
# Step 1: Translate the text
translation = translate_text(input_text)
# Step 2: Detect context
context_results = detect_context(input_text)
# Step 3: Print the list of high-confidence contexts
print("Detected Contexts (Top 3):", context_results)
# Return the translation and contexts
return translation, context_results
# Gradio interface
def gradio_interface(input_text):
translation, contexts = process_request(input_text)
# Format the output
output = f"Translation: {translation}\n\nDetected Contexts (Top 3):\n"
for context, score in contexts:
output += f"- {context} (confidence: {score:.4f})\n"
return output.strip()
# Create the Gradio interface
interface = gr.Interface(
fn=gradio_interface,
inputs="text",
outputs="text",
title="Frenchizer",
description="Translate text from English to French with context detection."
)
interface.launch()