Spaces:

dussen
/

Whisper_dutch

Sleeping

App Files Files Community

SevenhuijsenM commited on Dec 8, 2023

Commit

3aff486

1 Parent(s): e96b0df

Attempt for video url

Browse files

Files changed (2) hide show

app.py +52 -4
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,18 +1,66 @@
 from transformers import pipeline
 import gradio as gr
 pipe = pipeline(model="dussen/whisper-small-nl-hc")
-def transcribe(audio):
     text = pipe(audio)["text"]
     return text
-iface = gr.Interface(
-    fn=transcribe,
     inputs=gr.Audio(sources=["microphone"], type="filepath"),
     outputs="text",
     title="Whisper Small Dutch",
     description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
 )
-iface.launch()

 from transformers import pipeline
 import gradio as gr
+from pytube import YouTube
+import os
 pipe = pipeline(model="dussen/whisper-small-nl-hc")
+def download_audio(url, output_path='downloads'):
+    try:
+        # Create a YouTube object
+        yt = YouTube(url)
+        # Get the audio stream with the highest quality
+        audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
+        # Set the output path (default: 'downloads' folder in the current directory)
+        audio_stream.download(output_path)
+        # Change the file extension to mp3
+        mp4_file = audio_stream.default_filename
+        mp3_file = mp4_file.replace(".mp4", ".mp3")
+        mp4_path = f"{output_path}/{mp4_file}"
+        mp3_path = f"{output_path}/{mp3_file}"
+        os.rename(mp4_path, mp3_path)
+        # Delete the original file
+        os.remove(mp4_path)
+        # Use the model to transcribe the audio
+        text = pipe(mp3_path)["text"]
+        return text
+    except Exception as e:
+        print(f"Error: {e}")
+def url_to_text(url):
+    video = mp.VideoFileClip(url)
+    audio = video.audio
+    audio.write_audiofile("audio.wav")
+    text = pipe("audio.wav")["text"]
+    return text
+def audio_to_text(audio):
     text = pipe(audio)["text"]
     return text
+iface_video_url = gr.Interface(
+    fn=url_to_text,
+    inputs="text",
+    outputs="text",
+    title="Whisper Small Dutch",
+    description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
+)
+iface_audio = gr.Interface(
+    fn=audio_to_text,
     inputs=gr.Audio(sources=["microphone"], type="filepath"),
     outputs="text",
     title="Whisper Small Dutch",
     description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
 )
+app = gr.TabbedInterface([iface_audio, iface_video_url], ["Audio to text", "Video to text"])
+if __name__ == "__main__":
+    app.launch()

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ torch
 torchvision
 torchaudio
 transformers

 torchvision
 torchaudio
 transformers
+pytube