import os import librosa from transformers import pipeline labels = {0: 'O', 1: 'B-DATE', 2: 'B-EVENT', 3: 'B-LOC', 4: 'B-ORG', 5: 'B-PER', 6: 'I-DATE', 7: 'I-EVENT', 8: 'I-LOC', 9: 'I-ORG', 10: 'I-PER'} class AudioSpeechNERPipeline: def __init__(self, stt_model_name='abduaziz/whisper-small-uz', ner_model_name='abduaziz/bert-ner-uz', stt_language='uz'): # Initialize Speech-to-Text pipeline with timestamp support self.stt_pipeline = pipeline( task="automatic-speech-recognition", model=stt_model_name, return_timestamps=True # Enable timestamp support ) # Initialize NER pipeline self.ner_pipeline = pipeline( task="ner", model=ner_model_name ) def chunk_audio(self, audio_path, chunk_duration=30): """ Chunk long audio files into 30-second segments """ # Load audio file audio, sample_rate = librosa.load(audio_path, sr=16000) # Calculate chunk size chunk_samples = chunk_duration * sample_rate # Create chunks chunks = [] for start in range(0, len(audio), chunk_samples): chunk = audio[start:start+chunk_samples] chunks.append({ 'array': chunk, 'sampling_rate': 16000 }) return chunks def transcribe_audio(self, audio_path): """ Handle audio transcription for files longer than 30 seconds """ # Check audio length audio, sample_rate = librosa.load(audio_path, sr=16000) # If audio is longer than 30 seconds, chunk it if len(audio) / sample_rate > 30: audio_chunks = self.chunk_audio(audio_path) transcriptions = [] for chunk in audio_chunks: # Transcribe each chunk chunk_transcription = self.stt_pipeline(chunk) transcriptions.append(chunk_transcription['text']) # Combine transcriptions full_transcription = " ".join(transcriptions) else: # Process audio normally for short files full_transcription = self.stt_pipeline({ 'array': audio, 'sampling_rate': 16000 })['text'] return full_transcription def process_audio(self, audio_path): # Transcribe audio transcription = self.transcribe_audio(audio_path) # Extract named entities entities = self.ner_pipeline(transcription) return { 'filename': os.path.basename(audio_path), 'transcription': transcription, 'entities': entities } def create_ner_html(entities): """ Create HTML representation of named entities """ if not entities: return "No named entities found." html = "
Word | Entity Type |
---|---|
{entity['word']} | " \ f"{new_entity} | " \ f"