Spaces:
Build error
Build error
| import gradio as gr | |
| import torch.cuda | |
| import whisper | |
| from whisper.tokenizer import LANGUAGES | |
| from vid_to_wav import extract_audio | |
| gpu = torch.cuda.is_available() | |
| model = None | |
| def analyze_transcription(text, duration): | |
| word_count = len(text.split()) | |
| analysis_text = "The video is {} sec. long and the speaker speaks {} words.".format( | |
| duration, word_count) | |
| duration_in_min = duration/60 | |
| words_per_min = round(word_count /duration_in_min) | |
| analysis_text = analysis_text + "The speech speed is {} words-per-minute".format(words_per_min) | |
| if words_per_min < 130: | |
| analysis_text = analysis_text + "The speaker has spoken slowly that average speakers" | |
| elif words_per_min > 150: | |
| analysis_text = analysis_text + "The speaker has spoken faster that average speakers" | |
| else: | |
| analysis_text = analysis_text + "The speaker maintains normal speed during speech making the speech comprehensible to most audiences!" | |
| return analysis_text | |
| def transcribe(filepath, language, task): | |
| print(filepath) | |
| audio, audio_file, duration = extract_audio(filepath) | |
| print(type) | |
| language = None if language == "Detect" else language | |
| text = model.transcribe( | |
| audio_file, task=task.lower(), language=language, fp16=gpu, | |
| )["text"].strip() | |
| return text, analyze_transcription(text, duration) | |
| def get_interface(model_name="medium"): | |
| global model | |
| model = whisper.load_model(model_name) | |
| return gr.Interface( | |
| fn=transcribe, | |
| inputs=[ | |
| # gr.Audio(label="Record", source="microphone", type="filepath"), | |
| gr.Video(label="Upload", source="upload", type="filepath"), | |
| gr.Dropdown( | |
| label="Language", | |
| choices=["Detect"] + sorted([i.title() | |
| for i in LANGUAGES.values()]), | |
| value="Detect", | |
| ), | |
| gr.Dropdown( | |
| label="Task", | |
| choices=["Transcribe", "Translate"], | |
| value="Transcribe", | |
| info="Whether to perform X->X speech recognition or X->English translation", | |
| ), | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Transcription", lines=26), | |
| gr.Textbox(label="Speech Analysis", lines=4)], | |
| # theme=gr.themes.Default(), | |
| theme=gr.themes.Glass( | |
| primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.purple), | |
| title="Whisper is listening to you", | |
| # description=DESCRIPTION, | |
| allow_flagging="never", | |
| ) | |
| demo = get_interface() | |
| demo.queue().launch(debug=True) | |