import os import gradio as gr from huggingface_hub import login from pipe import AudioSpeechNERPipeline import html # Optimized Labels Dictionary LABELS = { 0: 'O', 1: 'B-DATE', 2: 'B-EVENT', 3: 'B-LOC', 4: 'B-ORG', 5: 'B-PER', 6: 'I-DATE', 7: 'I-EVENT', 8: 'I-LOC', 9: 'I-ORG', 10: 'I-PER' } def process_audio_pipeline(audio): """Robust Gradio processing function""" pipeline = AudioSpeechNERPipeline() try: transcription, entities = pipeline.process_audio(audio) highlighted_text = highlight_entities(transcription, entities) return transcription, highlighted_text except Exception as e: return f"Error processing audio: {str(e)}", "" def highlight_entities(transcription, entities): """Enhanced entity highlighting with a legend.""" # Map entity labels to human-readable labels if needed processed_entities = [ {**entity, 'label': LABELS[int(entity['entity'].split("_")[-1])]} for entity in entities if int(entity['entity'].split("_")[-1]) != 0 ] # Sort entities by their start position to avoid overlapping issues processed_entities.sort(key=lambda x: x.get('start', 0)) # Escape transcription for HTML safety transcription = html.escape(transcription) highlighted_text = transcription offset = 0 # Track how much the text length changes due to added HTML # Define color coding for entity types colors = { 'B-PER': 'blue', 'I-PER': 'blue', 'B-ORG': 'green', 'I-ORG': 'green', 'B-LOC': 'red', 'I-LOC': 'red', 'B-DATE': 'purple', 'I-DATE': 'purple', 'B-EVENT': 'orange', 'I-EVENT': 'orange' } for entity in processed_entities: start = entity.get('start', 0) + offset end = entity.get('end', 0) + offset label = entity['label'] color = colors.get(label, 'black') # Wrap the entity text with a styled span highlighted_part = ( f'' f'{highlighted_text[start:end]}' ) # Replace text in the highlighted_text with the HTML highlighted_text = ( highlighted_text[:start] + highlighted_part + highlighted_text[end:] ) # Update offset to account for added HTML offset += len(highlighted_part) - (end - start) # Create a legend for the labels and their colors legend = '

Legend:
' legend += ''.join( f'{label}' for label, color in colors.items() ) return highlighted_text + legend def create_gradio_interface(): """Enhanced Gradio interface with improved styling""" iface = gr.Interface( fn=process_audio_pipeline, inputs=gr.Audio(type="filepath", label="Upload Uzbek Audio"), outputs=[ gr.Textbox(label="Transcription"), gr.HTML(label="Named Entities") # Changed to HTML for highlighting ], title="🎙️ Uzbek Speech Recognition & NER", description=( "Upload an Uzbek audio file to transcribe and " "visualize named entities with color-coded highlighting. " "Supports MP3 and WAV formats." ), css=".gradio-container { background-color: #f0f0f0; }" ) return iface def main(): """Main execution function""" demo = create_gradio_interface() demo.launch() if __name__ == "__main__": # Optional: Handle HuggingFace login more securely token = os.getenv('HF_TOKEN') if token: login(token=token, new_session=False) main()