whisper-hac-demo

Sleeping

App Files Files Community

razhan commited on Nov 8, 2024

Commit

3a88da4

verified ·

1 Parent(s): e8e46c3

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -13

app.py CHANGED Viewed

@@ -170,6 +170,7 @@ import yt_dlp as youtube_dl
 import pytube as pt
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
 import tempfile
 import os
@@ -240,22 +241,45 @@ def download_yt_audio(yt_url, filename):
     stream = yt.streams.filter(only_audio=True)[0]
     stream.download(filename=filename)
-@spaces.GPU
-def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
     html_embed_str = _return_yt_html_embed(yt_url)
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        # filepath = os.path.join(tmpdirname, "video.mp4")
-        filepath = os.path.join(tmpdirname, "audio.mp3")
-        download_yt_audio(yt_url, filepath)
-        with open(filepath, "rb") as f:
-            inputs = f.read()
-    inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
-    inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
-    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
     return html_embed_str, text
@@ -310,7 +334,7 @@ yt_transcribe = gr.Interface(
 )
 with demo:
-    # gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
-    gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
 demo.queue().launch(ssr_mode=False)

 import pytube as pt
 from transformers import pipeline
 from transformers.pipelines.audio_utils import ffmpeg_read
+import pytube
 import tempfile
 import os
     stream = yt.streams.filter(only_audio=True)[0]
     stream.download(filename=filename)
+# @spaces.GPU
+# def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
+#     html_embed_str = _return_yt_html_embed(yt_url)
+#     with tempfile.TemporaryDirectory() as tmpdirname:
+#         # filepath = os.path.join(tmpdirname, "video.mp4")
+#         filepath = os.path.join(tmpdirname, "audio.mp3")
+#         download_yt_audio(yt_url, filepath)
+#         with open(filepath, "rb") as f:
+#             inputs = f.read()
+#     inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
+#     inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
+#     text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
+#     return html_embed_str, text
+def yt_transcribe(yt_url, task="transcribe", progress=gr.Progress(), max_filesize=75.0):
+    progress(0, desc="Loading audio file...")
     html_embed_str = _return_yt_html_embed(yt_url)
+    try:
+        yt = pytube.YouTube(yt_url)
+        stream = yt.streams.filter(only_audio=True)[0]
+    except:
+        raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
+    if stream.filesize_mb > max_filesize:
+        raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
+    stream.download(filename="audio.mp3")
+    with open("audio.mp3", "rb") as f:
+        inputs = f.read()
+    inputs = ffmpeg_read(inputs, processor.feature_extractor.sampling_rate)
+    inputs = {"array": inputs, "sampling_rate": processor.feature_extractor.sampling_rate}
+    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
     return html_embed_str, text
 )
 with demo:
+    gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
+    # gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
 demo.queue().launch(ssr_mode=False)