Spaces:
Sleeping
Sleeping
import time | |
from typing import Dict, List | |
import gradio as gr | |
import pandas as pd | |
from transformers import pipeline | |
class NERDemo: | |
def __init__(self): | |
self.ner_pipeline = pipeline( | |
"ner", | |
model="enesmanan/multilingual-xlm-roberta-ner", | |
aggregation_strategy="simple", | |
) | |
self.supported_languages = { | |
"en": "English", | |
"de": "German", | |
"tr": "Turkish", | |
"es": "Spanish", | |
"fr": "French", | |
} | |
def process_ner(self, text: str, language: str) -> Dict: | |
"""Process text through NER pipeline and return entities with metadata""" | |
if not text: | |
return {"text": "", "entities": []} | |
start_time = time.time() | |
entities = self.ner_pipeline(text) | |
processing_time = round((time.time() - start_time) * 1000, 2) # ms | |
# Create DataFrame for entity statistics | |
if entities: | |
df = pd.DataFrame(entities) | |
entity_stats = df["entity_group"].value_counts().to_dict() | |
else: | |
entity_stats = {} | |
return { | |
"text": text, | |
"entities": entities, | |
"stats": entity_stats, | |
"processing_time": processing_time, | |
} | |
def create_demo(self) -> gr.Interface: | |
"""Create and configure the Gradio interface""" | |
theme = gr.themes.Base( | |
primary_hue="blue", | |
secondary_hue="slate", | |
font=gr.themes.GoogleFont("Source Sans Pro"), | |
neutral_hue="slate", | |
).set( | |
body_text_color="*neutral_950", | |
block_background_fill="*neutral_50", | |
block_border_width="0px", | |
button_primary_background_fill="*primary_500", | |
button_primary_background_fill_hover="*primary_600", | |
button_primary_text_color="white", | |
input_background_fill="white", | |
block_radius="lg", | |
) | |
with gr.Blocks(theme=theme) as demo: | |
with gr.Row(): | |
gr.HTML( | |
""" | |
<div style="text-align: center; max-width: 800px; margin: 0 auto; padding: 1rem; font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Arial', sans-serif;"> | |
<h1 style="color: #374151; font-size: 2.5rem; font-weight: 600; margin-bottom: 0.5rem;"> | |
Multilingual Named Entity Recognition | |
</h1> | |
<p style="color: #6B7280; font-size: 1.1rem; line-height: 1.5; margin-top: 0.5rem;"> | |
This demo uses XLM-RoBERTa model fine-tuned for NER tasks in multiple languages. | |
Automatically detects and highlights named entities such as persons, organizations, locations, and more. | |
</p> | |
</div> | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=3): | |
text_input = gr.Textbox( | |
label="Input Text", | |
placeholder="Enter text in any supported language...", | |
lines=3, | |
) | |
language = gr.Dropdown( | |
choices=list(self.supported_languages.values()), | |
label="Language (Optional)", | |
value="English", | |
) | |
with gr.Row(): | |
submit_btn = gr.Button("Analyze", variant="primary") | |
clear_btn = gr.Button("Clear") | |
with gr.Column(scale=2): | |
with gr.Group(): | |
gr.HTML( | |
""" | |
<div style="margin-bottom: 1rem;"> | |
<h3 style="color: #374151; font-size: 1.25rem; font-weight: 600; margin-bottom: 0.5rem;"> | |
Entity Statistics | |
</h3> | |
</div> | |
""" | |
) | |
stats_output = gr.Json(label="Detected Entities") | |
time_output = gr.Markdown(elem_classes="text-sm text-gray-600") | |
highlighted_output = gr.HighlightedText( | |
label="Detected Entities", show_legend=True | |
) | |
# Example inputs | |
examples = [ | |
[ | |
"Emma Watson starred in Harry Potter and studied at Oxford University while working with United Nations.", | |
"English", | |
], | |
[ | |
"Die Deutsche Bank hat ihren Hauptsitz in Frankfurt, während BMW in München produziert.", | |
"German", | |
], | |
[ | |
"Enes Fehmi Manan, İzmir'de yaşıyor ve Fibababanka'da çalışıyor.", | |
"Turkish", | |
], | |
[ | |
"Le Louvre à Paris expose la Joconde de Leonardo da Vinci depuis le XIXe siècle.", | |
"French", | |
], | |
[ | |
"El Real Madrid jugará contra el Barcelona en el Santiago Bernabéu el próximo mes.", | |
"Spanish", | |
], | |
] | |
gr.Examples(examples, inputs=[text_input, language]) | |
# Event handlers | |
def process_and_format(text: str, lang: str) -> tuple: | |
result = self.process_ner(text, lang) | |
stats = result["stats"] | |
time_msg = f"Processing time: {result['processing_time']} ms" | |
return (result, stats, time_msg) | |
submit_btn.click( | |
process_and_format, | |
inputs=[text_input, language], | |
outputs=[highlighted_output, stats_output, time_output], | |
) | |
clear_btn.click( | |
lambda: (None, None, ""), | |
outputs=[highlighted_output, stats_output, time_output], | |
) | |
return demo | |
if __name__ == "__main__": | |
ner_demo = NERDemo() | |
demo = ner_demo.create_demo() | |
demo.launch(share=True) | |