File size: 2,015 Bytes
056d0ee 2df1dbf 056d0ee e37a842 a7ba70b 056d0ee 2df1dbf aa6a21b 9ef0fe3 3d01ce1 6a49e4e aa6a21b 390c6e2 bdac16e 161ceed aa6a21b 9ef0fe3 bdac16e 5912881 bdac16e 2df1dbf bdac16e 2df1dbf bdac16e 6022473 4c1d1fd 2df1dbf bdac16e 2df1dbf bdac16e 4c1d1fd 2df1dbf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
from transformers import pipeline
import gradio as gr
asr = pipeline(task="automatic-speech-recognition",
model="openai/whisper-medium")
# Especificar el idioma de salida en espa帽ol
asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(language="spanish", task="transcribe")
demo = gr.Blocks()
def transcribe_long_form(filepath):
if filepath is None:
gr.Warning("No audio found, please retry.")
return ""
output = asr(
filepath,
max_new_tokens=256,
chunk_length_s=30,
batch_size=8,
)
return output["text"]
ner = pipeline("ner",
model="mrm8488/bert-spanish-cased-finetuned-ner",
)
def get_ner(input_text):
if input_text is None:
gr.Warning("No transcription found, please retry.")
return {"text": "", "entities": ""}
output = ner(input_text)
return {"text": input_text, "entities": output}
def main(filepath):
transcription = transcribe_long_form(filepath)
ner = get_ner(transcription)
return transcription, ner
mic_transcribe = gr.Interface(
fn=main,
inputs=gr.Audio(sources="microphone",
type="filepath"),
outputs=[gr.Textbox(label="Transcription", lines=3),
gr.HighlightedText(label="Text with entities")],
title="Transcribir audio desde grabaci贸n",
description="Transcripci贸n de audio grabado desde micr贸fono.",
allow_flagging="never")
file_transcribe = gr.Interface(
fn=main,
inputs=gr.Audio(sources="upload",
type="filepath"),
outputs=[gr.Textbox(label="Transcription", lines=3),
gr.HighlightedText(label="Text with entities")],
title="Transcribir audio desde archivo",
description="Transcripci贸n a partir de un archivo de audio.",
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mic_transcribe,
file_transcribe],
["Transcribe Microphone",
"Transcribe Audio File"],
)
demo.launch()
|