import gradio as gr import time from transformers import pipeline import torch import ffmpeg def main(): # Check if GPU is available use_gpu = torch.cuda.is_available() # Configure the pipeline to use the GPU if available if use_gpu: p = pipeline( "automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0 ) else: p = pipeline( "automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h" ) def extract_audio_from_m3u8(url): try: output_file = "output_audio.aac" ffmpeg.input(url).output(output_file).run(overwrite_output=True) return output_file except Exception as e: return f"Hendan villan hendi: {e}" def transcribe_function(audio, state, m3u8_url): if m3u8_url: audio = extract_audio_from_m3u8(m3u8_url) if not audio: # Return a meaningful message; no audio found return state, "Einki ljóð er til talukenning." try: time.sleep(3) text = p(audio, chunk_length_s=50)["text"] state += text + "\n" return state, text except Exception as e: return state, "Okkurt riggaði ikki í talukenningini." def reset_output(transcription, state): """Function to reset the state to an empty string.""" return "", "" with gr.Blocks() as demo: state_var = gr.State("") with gr.Row(): with gr.Column(): microphone = gr.Audio( type="filepath", label="Mikrofon ella ljóðfíla" ) m3u8_url = gr.Textbox( label="m3u8-leinki (t.d. frá kvf.fo ella logting.fo)" ) with gr.Column(): transcription_var = gr.Textbox( type="text", label="Tekstur frá talukennara", interactive=False ) with gr.Row(): transcribe_button = gr.Button("Byrja talukenning") reset_button = gr.Button("Strika tekst frá talukennara") transcribe_button.click( transcribe_function, [microphone, state_var, m3u8_url], # Removed uploaded_audio [state_var, transcription_var] ) reset_button.click( reset_output, [transcription_var, state_var], [transcription_var, state_var] ) # Launch with the latest Gradio features demo.launch() if __name__ == "__main__": main()