Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -170,6 +170,7 @@ import yt_dlp as youtube_dl
|
|
| 170 |
import pytube as pt
|
| 171 |
from transformers import pipeline
|
| 172 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
|
|
|
| 173 |
|
| 174 |
import tempfile
|
| 175 |
import os
|
|
@@ -240,22 +241,45 @@ def download_yt_audio(yt_url, filename):
|
|
| 240 |
stream = yt.streams.filter(only_audio=True)[0]
|
| 241 |
stream.download(filename=filename)
|
| 242 |
|
| 243 |
-
@spaces.GPU
|
| 244 |
-
def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
html_embed_str = _return_yt_html_embed(yt_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
filepath = os.path.join(tmpdirname, "audio.mp3")
|
| 250 |
-
download_yt_audio(yt_url, filepath)
|
| 251 |
-
with open(filepath, "rb") as f:
|
| 252 |
-
inputs = f.read()
|
| 253 |
|
| 254 |
-
|
| 255 |
-
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
| 256 |
|
| 257 |
-
|
|
|
|
| 258 |
|
|
|
|
|
|
|
|
|
|
| 259 |
return html_embed_str, text
|
| 260 |
|
| 261 |
|
|
@@ -310,7 +334,7 @@ yt_transcribe = gr.Interface(
|
|
| 310 |
)
|
| 311 |
|
| 312 |
with demo:
|
| 313 |
-
|
| 314 |
-
gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
|
| 315 |
|
| 316 |
demo.queue().launch(ssr_mode=False)
|
|
|
|
| 170 |
import pytube as pt
|
| 171 |
from transformers import pipeline
|
| 172 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
| 173 |
+
import pytube
|
| 174 |
|
| 175 |
import tempfile
|
| 176 |
import os
|
|
|
|
| 241 |
stream = yt.streams.filter(only_audio=True)[0]
|
| 242 |
stream.download(filename=filename)
|
| 243 |
|
| 244 |
+
# @spaces.GPU
|
| 245 |
+
# def yt_transcribe(yt_url, task="transcribe", max_filesize=75.0):
|
| 246 |
+
# html_embed_str = _return_yt_html_embed(yt_url)
|
| 247 |
+
|
| 248 |
+
# with tempfile.TemporaryDirectory() as tmpdirname:
|
| 249 |
+
# # filepath = os.path.join(tmpdirname, "video.mp4")
|
| 250 |
+
# filepath = os.path.join(tmpdirname, "audio.mp3")
|
| 251 |
+
# download_yt_audio(yt_url, filepath)
|
| 252 |
+
# with open(filepath, "rb") as f:
|
| 253 |
+
# inputs = f.read()
|
| 254 |
+
|
| 255 |
+
# inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
| 256 |
+
# inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
| 257 |
+
|
| 258 |
+
# text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
| 259 |
+
|
| 260 |
+
# return html_embed_str, text
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
def yt_transcribe(yt_url, task="transcribe", progress=gr.Progress(), max_filesize=75.0):
|
| 264 |
+
progress(0, desc="Loading audio file...")
|
| 265 |
html_embed_str = _return_yt_html_embed(yt_url)
|
| 266 |
+
try:
|
| 267 |
+
yt = pytube.YouTube(yt_url)
|
| 268 |
+
stream = yt.streams.filter(only_audio=True)[0]
|
| 269 |
+
except:
|
| 270 |
+
raise gr.Error("An error occurred while loading the YouTube video. Please try again.")
|
| 271 |
|
| 272 |
+
if stream.filesize_mb > max_filesize:
|
| 273 |
+
raise gr.Error(f"Maximum YouTube file size is {max_filesize}MB, got {stream.filesize_mb:.2f}MB.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
+
stream.download(filename="audio.mp3")
|
|
|
|
| 276 |
|
| 277 |
+
with open("audio.mp3", "rb") as f:
|
| 278 |
+
inputs = f.read()
|
| 279 |
|
| 280 |
+
inputs = ffmpeg_read(inputs, processor.feature_extractor.sampling_rate)
|
| 281 |
+
inputs = {"array": inputs, "sampling_rate": processor.feature_extractor.sampling_rate}
|
| 282 |
+
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
| 283 |
return html_embed_str, text
|
| 284 |
|
| 285 |
|
|
|
|
| 334 |
)
|
| 335 |
|
| 336 |
with demo:
|
| 337 |
+
gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
|
| 338 |
+
# gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"])
|
| 339 |
|
| 340 |
demo.queue().launch(ssr_mode=False)
|