Frenchizer commited on
Commit
3f40561
·
verified ·
1 Parent(s): 86d39de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -7
app.py CHANGED
@@ -1,9 +1,22 @@
1
  import gradio as gr
2
- from sentence_transformers import SentenceTransformer
3
  from sklearn.metrics.pairwise import cosine_similarity
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- # Load the model and precompute label embeddings
6
- context_model = SentenceTransformer("all-MiniLM-L6-v2")
 
 
7
  labels = [
8
  "aerospace", "anatomy", "anthropology", "art",
9
  "automotive", "blockchain", "biology", "chemistry",
@@ -18,10 +31,27 @@ labels = [
18
  "robotics", "slang", "social media", "speech", "sports",
19
  "sustained", "technical", "theater", "tourism", "travel"
20
  ]
21
- label_embeddings = context_model.encode(labels)
22
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold=0.8, max_results=3):
24
- input_embedding = context_model.encode([input_text])
 
 
 
 
 
 
25
  similarities = cosine_similarity(input_embedding, label_embeddings)[0]
26
 
27
  for label, score in zip(labels, similarities):
@@ -33,7 +63,6 @@ def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold
33
  return [label for label, score in sorted_labels] if sorted_labels else ["general"]
34
 
35
  # Translation client
36
- from gradio_client import Client
37
  translation_client = Client("Frenchizer/space_3")
38
 
39
  def translate_text(input_text):
@@ -54,4 +83,4 @@ interface = gr.Interface(
54
  description="Translate text from English to French with context detection."
55
  )
56
 
57
- interface.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModel
3
  from sklearn.metrics.pairwise import cosine_similarity
4
+ import torch
5
+ import numpy as np
6
+ from gradio_client import Client
7
+
8
+ # Cache the model and tokenizer
9
+ @gr.cache()
10
+ def load_model_and_tokenizer():
11
+ model_name = "Frenchizer/all-MiniLM-L6-v2" # Replace with your Space and model path
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModel.from_pretrained(model_name)
14
+ return tokenizer, model
15
 
16
+ # Load the model and tokenizer
17
+ tokenizer, model = load_model_and_tokenizer()
18
+
19
+ # Precompute label embeddings
20
  labels = [
21
  "aerospace", "anatomy", "anthropology", "art",
22
  "automotive", "blockchain", "biology", "chemistry",
 
31
  "robotics", "slang", "social media", "speech", "sports",
32
  "sustained", "technical", "theater", "tourism", "travel"
33
  ]
 
34
 
35
+ @gr.cache()
36
+ def precompute_label_embeddings():
37
+ def encode_text(texts):
38
+ inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
39
+ with torch.no_grad():
40
+ outputs = model(**inputs)
41
+ return outputs.last_hidden_state.mean(dim=1).numpy() # Use mean pooling for embeddings
42
+ return encode_text(labels)
43
+
44
+ label_embeddings = precompute_label_embeddings()
45
+
46
+ # Function to detect context
47
  def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold=0.8, max_results=3):
48
+ def encode_text(texts):
49
+ inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
50
+ with torch.no_grad():
51
+ outputs = model(**inputs)
52
+ return outputs.last_hidden_state.mean(dim=1).numpy() # Use mean pooling for embeddings
53
+
54
+ input_embedding = encode_text([input_text])
55
  similarities = cosine_similarity(input_embedding, label_embeddings)[0]
56
 
57
  for label, score in zip(labels, similarities):
 
63
  return [label for label, score in sorted_labels] if sorted_labels else ["general"]
64
 
65
  # Translation client
 
66
  translation_client = Client("Frenchizer/space_3")
67
 
68
  def translate_text(input_text):
 
83
  description="Translate text from English to French with context detection."
84
  )
85
 
86
+ interface.launch()