SevenhuijsenM commited on
Commit
3aff486
·
1 Parent(s): e96b0df

Attempt for video url

Browse files
Files changed (2) hide show
  1. app.py +52 -4
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,18 +1,66 @@
1
  from transformers import pipeline
2
  import gradio as gr
 
 
3
 
4
  pipe = pipeline(model="dussen/whisper-small-nl-hc")
5
 
6
- def transcribe(audio):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  text = pipe(audio)["text"]
8
  return text
9
 
10
- iface = gr.Interface(
11
- fn=transcribe,
 
 
 
 
 
 
 
 
12
  inputs=gr.Audio(sources=["microphone"], type="filepath"),
13
  outputs="text",
14
  title="Whisper Small Dutch",
15
  description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
16
  )
17
 
18
- iface.launch()
 
 
 
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ from pytube import YouTube
4
+ import os
5
 
6
  pipe = pipeline(model="dussen/whisper-small-nl-hc")
7
 
8
+ def download_audio(url, output_path='downloads'):
9
+ try:
10
+ # Create a YouTube object
11
+ yt = YouTube(url)
12
+
13
+ # Get the audio stream with the highest quality
14
+ audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
15
+
16
+ # Set the output path (default: 'downloads' folder in the current directory)
17
+ audio_stream.download(output_path)
18
+
19
+ # Change the file extension to mp3
20
+ mp4_file = audio_stream.default_filename
21
+ mp3_file = mp4_file.replace(".mp4", ".mp3")
22
+ mp4_path = f"{output_path}/{mp4_file}"
23
+ mp3_path = f"{output_path}/{mp3_file}"
24
+ os.rename(mp4_path, mp3_path)
25
+
26
+ # Delete the original file
27
+ os.remove(mp4_path)
28
+
29
+ # Use the model to transcribe the audio
30
+ text = pipe(mp3_path)["text"]
31
+
32
+ return text
33
+ except Exception as e:
34
+ print(f"Error: {e}")
35
+
36
+ def url_to_text(url):
37
+ video = mp.VideoFileClip(url)
38
+ audio = video.audio
39
+ audio.write_audiofile("audio.wav")
40
+ text = pipe("audio.wav")["text"]
41
+ return text
42
+
43
+ def audio_to_text(audio):
44
  text = pipe(audio)["text"]
45
  return text
46
 
47
+ iface_video_url = gr.Interface(
48
+ fn=url_to_text,
49
+ inputs="text",
50
+ outputs="text",
51
+ title="Whisper Small Dutch",
52
+ description="Demo for dutch speech recognition using a fine-tuned Whisper small model.",
53
+ )
54
+
55
+ iface_audio = gr.Interface(
56
+ fn=audio_to_text,
57
  inputs=gr.Audio(sources=["microphone"], type="filepath"),
58
  outputs="text",
59
  title="Whisper Small Dutch",
60
  description="Realtime demo for dutch speech recognition using a fine-tuned Whisper small model.",
61
  )
62
 
63
+ app = gr.TabbedInterface([iface_audio, iface_video_url], ["Audio to text", "Video to text"])
64
+
65
+ if __name__ == "__main__":
66
+ app.launch()
requirements.txt CHANGED
@@ -2,3 +2,4 @@ torch
2
  torchvision
3
  torchaudio
4
  transformers
 
 
2
  torchvision
3
  torchaudio
4
  transformers
5
+ pytube