Spaces:
Sleeping
Sleeping
SevenhuijsenM
commited on
Commit
·
3aff486
1
Parent(s):
e96b0df
Attempt for video url
Browse files- app.py +52 -4
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,18 +1,66 @@
|
|
1 |
from transformers import pipeline
|
2 |
import gradio as gr
|
|
|
|
|
3 |
|
4 |
pipe = pipeline(model="dussen/whisper-small-nl-hc")
|
5 |
|
6 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
text = pipe(audio)["text"]
|
8 |
return text
|
9 |
|
10 |
-
|
11 |
-
fn=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
inputs=gr.Audio(sources=["microphone"], type="filepath"),
|
13 |
outputs="text",
|
14 |
title="Whisper Small Dutch",
|
15 |
description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
|
16 |
)
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
1 |
from transformers import pipeline
|
2 |
import gradio as gr
|
3 |
+
from pytube import YouTube
|
4 |
+
import os
|
5 |
|
6 |
pipe = pipeline(model="dussen/whisper-small-nl-hc")
|
7 |
|
8 |
+
def download_audio(url, output_path='downloads'):
|
9 |
+
try:
|
10 |
+
# Create a YouTube object
|
11 |
+
yt = YouTube(url)
|
12 |
+
|
13 |
+
# Get the audio stream with the highest quality
|
14 |
+
audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
|
15 |
+
|
16 |
+
# Set the output path (default: 'downloads' folder in the current directory)
|
17 |
+
audio_stream.download(output_path)
|
18 |
+
|
19 |
+
# Change the file extension to mp3
|
20 |
+
mp4_file = audio_stream.default_filename
|
21 |
+
mp3_file = mp4_file.replace(".mp4", ".mp3")
|
22 |
+
mp4_path = f"{output_path}/{mp4_file}"
|
23 |
+
mp3_path = f"{output_path}/{mp3_file}"
|
24 |
+
os.rename(mp4_path, mp3_path)
|
25 |
+
|
26 |
+
# Delete the original file
|
27 |
+
os.remove(mp4_path)
|
28 |
+
|
29 |
+
# Use the model to transcribe the audio
|
30 |
+
text = pipe(mp3_path)["text"]
|
31 |
+
|
32 |
+
return text
|
33 |
+
except Exception as e:
|
34 |
+
print(f"Error: {e}")
|
35 |
+
|
36 |
+
def url_to_text(url):
|
37 |
+
video = mp.VideoFileClip(url)
|
38 |
+
audio = video.audio
|
39 |
+
audio.write_audiofile("audio.wav")
|
40 |
+
text = pipe("audio.wav")["text"]
|
41 |
+
return text
|
42 |
+
|
43 |
+
def audio_to_text(audio):
|
44 |
text = pipe(audio)["text"]
|
45 |
return text
|
46 |
|
47 |
+
iface_video_url = gr.Interface(
|
48 |
+
fn=url_to_text,
|
49 |
+
inputs="text",
|
50 |
+
outputs="text",
|
51 |
+
title="Whisper Small Dutch",
|
52 |
+
description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
|
53 |
+
)
|
54 |
+
|
55 |
+
iface_audio = gr.Interface(
|
56 |
+
fn=audio_to_text,
|
57 |
inputs=gr.Audio(sources=["microphone"], type="filepath"),
|
58 |
outputs="text",
|
59 |
title="Whisper Small Dutch",
|
60 |
description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
|
61 |
)
|
62 |
|
63 |
+
app = gr.TabbedInterface([iface_audio, iface_video_url], ["Audio to text", "Video to text"])
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
app.launch()
|
requirements.txt
CHANGED
@@ -2,3 +2,4 @@ torch
|
|
2 |
torchvision
|
3 |
torchaudio
|
4 |
transformers
|
|
|
|
2 |
torchvision
|
3 |
torchaudio
|
4 |
transformers
|
5 |
+
pytube
|