Frenchizer commited on
Commit
86d39de
·
verified ·
1 Parent(s): 8cf36e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -73
app.py CHANGED
@@ -1,73 +1,57 @@
1
- import gradio as gr
2
- import numpy as np
3
- from sentence_transformers import SentenceTransformer
4
- from sklearn.metrics.pairwise import cosine_similarity
5
-
6
- # Load the new model and tokenizer
7
- context_model = SentenceTransformer("all-MiniLM-L6-v2")
8
-
9
- # Define your labels
10
- labels = [
11
- "aerospace", "anatomy", "anthropology", "art",
12
- "automotive", "blockchain", "biology", "chemistry",
13
- "cryptocurrency", "data science", "design", "e-commerce",
14
- "education", "engineering", "entertainment", "environment",
15
- "fashion", "finance", "food commerce", "general",
16
- "gaming", "healthcare", "history", "html",
17
- "information technology", "IT", "keywords", "legal",
18
- "literature", "machine learning", "marketing", "medicine",
19
- "music", "personal development", "philosophy", "physics",
20
- "politics", "poetry", "programming", "real estate", "retail",
21
- "robotics", "slang", "social media", "speech", "sports",
22
- "sustained", "technical", "theater", "tourism", "travel"
23
- ]
24
-
25
- # Pre-compute label embeddings
26
- label_embeddings = context_model.encode(labels)
27
-
28
- def detect_context(input_text, top_n=3, score_threshold=0.05):
29
- # Encode input text
30
- input_embedding = context_model.encode([input_text])
31
-
32
- # Compute cosine similarity with labels
33
- similarities = cosine_similarity(input_embedding, label_embeddings)[0]
34
-
35
- # Pair labels with scores
36
- label_scores = [(label, score) for label, score in zip(labels, similarities)]
37
-
38
- # Sort by score and filter by threshold
39
- sorted_labels = sorted(label_scores, key=lambda x: x[1], reverse=True)
40
- filtered_labels = [label for label, score in sorted_labels if score > score_threshold]
41
-
42
- # Return top N contexts
43
- return filtered_labels[:top_n] if filtered_labels else ["general"]
44
-
45
- # Translation client for space_3
46
- from gradio_client import Client
47
- translation_client = Client("Frenchizer/space_3") # Replace with your Space name
48
-
49
- def translate_text(input_text):
50
- # Call the translation model
51
- result = translation_client.predict(input_text)
52
- return result
53
-
54
- def process_request(input_text):
55
- # Detect context
56
- context = detect_context(input_text)
57
- print(f"Detected context: {context}")
58
-
59
- # Translate text
60
- translation = translate_text(input_text)
61
- return translation
62
-
63
- # Create a Gradio interface
64
- interface = gr.Interface(
65
- fn=process_request,
66
- inputs="text",
67
- outputs="text",
68
- title="Frenchizer",
69
- description="Translate text from English to French with context detection."
70
- )
71
-
72
- # Launch the Gradio app
73
- interface.launch()
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+
5
+ # Load the model and precompute label embeddings
6
+ context_model = SentenceTransformer("all-MiniLM-L6-v2")
7
+ labels = [
8
+ "aerospace", "anatomy", "anthropology", "art",
9
+ "automotive", "blockchain", "biology", "chemistry",
10
+ "cryptocurrency", "data science", "design", "e-commerce",
11
+ "education", "engineering", "entertainment", "environment",
12
+ "fashion", "finance", "food commerce", "general",
13
+ "gaming", "healthcare", "history", "html",
14
+ "information technology", "IT", "keywords", "legal",
15
+ "literature", "machine learning", "marketing", "medicine",
16
+ "music", "personal development", "philosophy", "physics",
17
+ "politics", "poetry", "programming", "real estate", "retail",
18
+ "robotics", "slang", "social media", "speech", "sports",
19
+ "sustained", "technical", "theater", "tourism", "travel"
20
+ ]
21
+ label_embeddings = context_model.encode(labels)
22
+
23
+ def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold=0.8, max_results=3):
24
+ input_embedding = context_model.encode([input_text])
25
+ similarities = cosine_similarity(input_embedding, label_embeddings)[0]
26
+
27
+ for label, score in zip(labels, similarities):
28
+ if score >= high_confidence_threshold:
29
+ return [label]
30
+
31
+ label_scores = [(label, score) for label, score in zip(labels, similarities) if score >= fallback_threshold]
32
+ sorted_labels = sorted(label_scores, key=lambda x: x[1], reverse=True)[:max_results]
33
+ return [label for label, score in sorted_labels] if sorted_labels else ["general"]
34
+
35
+ # Translation client
36
+ from gradio_client import Client
37
+ translation_client = Client("Frenchizer/space_3")
38
+
39
+ def translate_text(input_text):
40
+ return translation_client.predict(input_text)
41
+
42
+ def process_request(input_text):
43
+ context = detect_context(input_text)
44
+ print(f"Detected context: {context}")
45
+ translation = translate_text(input_text)
46
+ return translation
47
+
48
+ # Gradio interface
49
+ interface = gr.Interface(
50
+ fn=process_request,
51
+ inputs="text",
52
+ outputs="text",
53
+ title="Frenchizer",
54
+ description="Translate text from English to French with context detection."
55
+ )
56
+
57
+ interface.launch()