Spaces:
Running
Running
Artyom Boyko
commited on
Commit
·
95b1e18
1
Parent(s):
c867d05
Solving the YouTube problem.
Browse files- app_srv/app_srv.py +27 -64
- app_srv/audio_processing.py +1 -1
- app_srv/downloader.py +4 -1
- app_srv/test.ipynb +6 -66
- packages.txt +2 -1
- requirements.txt +2 -1
app_srv/app_srv.py
CHANGED
|
@@ -2,8 +2,6 @@ import gradio as gr
|
|
| 2 |
import torch
|
| 3 |
import os
|
| 4 |
import json
|
| 5 |
-
import requests # Added for making HTTP requests
|
| 6 |
-
import socket # Added for getting hostname
|
| 7 |
|
| 8 |
# Import your modules
|
| 9 |
from downloader import download_youtube_video
|
|
@@ -15,35 +13,14 @@ from model_api import get_device_and_dtype
|
|
| 15 |
device, dtype = get_device_and_dtype()
|
| 16 |
|
| 17 |
# Default prompt
|
| 18 |
-
DEFAULT_PROMPT = "
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
"""
|
| 27 |
-
public_ip = "N/A"
|
| 28 |
-
hostname = "N/A"
|
| 29 |
-
|
| 30 |
-
try:
|
| 31 |
-
# Get public IP address
|
| 32 |
-
response = requests.get("https://api.ipify.org?format=json", timeout=5)
|
| 33 |
-
response.raise_for_status() # Raise an exception for HTTP errors
|
| 34 |
-
public_ip = response.json().get("ip", "N/A")
|
| 35 |
-
except requests.exceptions.RequestException as e:
|
| 36 |
-
print(f"Error getting public IP: {e}")
|
| 37 |
-
public_ip = f"Error: {e}"
|
| 38 |
-
|
| 39 |
-
try:
|
| 40 |
-
# Get hostname
|
| 41 |
-
hostname = socket.gethostname()
|
| 42 |
-
except Exception as e:
|
| 43 |
-
print(f"Error getting hostname: {e}")
|
| 44 |
-
hostname = f"Error: {e}"
|
| 45 |
-
|
| 46 |
-
return f"Public IP: {public_ip} | Hostname: {hostname}"
|
| 47 |
|
| 48 |
# --- OPTIMIZED FUNCTION, RETURNING JSON STRING ---
|
| 49 |
def analyze_video_data(youtube_url: str, prompt: str, quality: int, time_step: float) -> str:
|
|
@@ -62,8 +39,8 @@ def analyze_video_data(youtube_url: str, prompt: str, quality: int, time_step: f
|
|
| 62 |
str: A JSON formatted string containing the analysis results.
|
| 63 |
The JSON structure includes:
|
| 64 |
- "status": "success" if the analysis was successful, "error" otherwise.
|
| 65 |
-
- "message": A brief description of the outcome (
|
| 66 |
-
or an error message
|
| 67 |
- "frame_analysis": A list of dictionaries, where each dictionary represents a frame
|
| 68 |
and contains "timestamp" and "description".
|
| 69 |
- "audio_transcription": The transcribed text of the video's audio.
|
|
@@ -74,21 +51,25 @@ def analyze_video_data(youtube_url: str, prompt: str, quality: int, time_step: f
|
|
| 74 |
"""
|
| 75 |
|
| 76 |
results = {
|
| 77 |
-
"status": "success",
|
| 78 |
-
"message": "",
|
| 79 |
"frame_analysis": [],
|
| 80 |
"audio_transcription": ""
|
| 81 |
}
|
| 82 |
|
| 83 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
# 1. Download video
|
| 85 |
video_data = download_youtube_video(
|
| 86 |
url=youtube_url,
|
| 87 |
-
video_quality=quality
|
|
|
|
| 88 |
)
|
| 89 |
|
| 90 |
# 2. Extract frames
|
| 91 |
-
# frames_dict: {timestamp: path_to_frame_image}
|
| 92 |
frames_dict = extract_frames_with_timestamps(
|
| 93 |
video_path=video_data['video_path'],
|
| 94 |
output_dir=video_data['data_path'],
|
|
@@ -99,7 +80,7 @@ def analyze_video_data(youtube_url: str, prompt: str, quality: int, time_step: f
|
|
| 99 |
# 3. Generate descriptions for frames
|
| 100 |
descriptions = generate_frame_descriptions(
|
| 101 |
frames_dict=frames_dict,
|
| 102 |
-
custom_prompt=prompt,
|
| 103 |
device=device,
|
| 104 |
torch_dtype=dtype
|
| 105 |
)
|
|
@@ -116,6 +97,8 @@ def analyze_video_data(youtube_url: str, prompt: str, quality: int, time_step: f
|
|
| 116 |
})
|
| 117 |
|
| 118 |
results["audio_transcription"] = transcription_text
|
|
|
|
|
|
|
| 119 |
|
| 120 |
# Return formatted JSON string
|
| 121 |
return json.dumps(results, indent=2, ensure_ascii=False)
|
|
@@ -124,12 +107,12 @@ def analyze_video_data(youtube_url: str, prompt: str, quality: int, time_step: f
|
|
| 124 |
error_message = f"Processing error: {str(e)}"
|
| 125 |
print(f"An error occurred during video analysis: {e}") # For debugging
|
| 126 |
|
| 127 |
-
results["status"] = "error"
|
| 128 |
-
results["message"] = error_message
|
| 129 |
-
results["frame_analysis"] = []
|
| 130 |
-
results["audio_transcription"] = ""
|
| 131 |
|
| 132 |
-
#
|
| 133 |
return json.dumps(results, indent=2, ensure_ascii=False)
|
| 134 |
|
| 135 |
# Create Gradio interface
|
|
@@ -138,12 +121,6 @@ with gr.Blocks(title="Video Analysis Tool", css="""
|
|
| 138 |
.output-box {border-radius: 8px !important; margin-top:15px;}
|
| 139 |
.results-output {background:#f8f8f8 !important; padding:15px !important;}
|
| 140 |
h1 {color: #1a73e8 !important;}
|
| 141 |
-
.ip-info {
|
| 142 |
-
font-size: 0.9em;
|
| 143 |
-
color: #666;
|
| 144 |
-
margin-top: -15px; /* Adjust as needed to pull it closer to the title */
|
| 145 |
-
margin-bottom: 10px;
|
| 146 |
-
}
|
| 147 |
""") as demo:
|
| 148 |
|
| 149 |
gr.Markdown("""
|
|
@@ -151,21 +128,7 @@ with gr.Blocks(title="Video Analysis Tool", css="""
|
|
| 151 |
Analyze YouTube videos - get frame-by-frame descriptions with timestamps and audio transcription.
|
| 152 |
""")
|
| 153 |
|
| 154 |
-
#
|
| 155 |
-
# We use a gr.Markdown component to display the text.
|
| 156 |
-
# The key here is that get_public_ip_and_hostname is NOT directly an input/output
|
| 157 |
-
# of a button. It's called once when the app loads, or its output is static.
|
| 158 |
-
# To prevent it from being in MCP API, we typically don't expose it via gr.Interface
|
| 159 |
-
# or explicitly set show_api=False for the component if it were interactive.
|
| 160 |
-
# Here, it's a simple call rendered in Markdown, so it won't be exposed.
|
| 161 |
-
gr.Markdown(
|
| 162 |
-
f"<div class='ip-info'>{get_public_ip_and_hostname()}</div>",
|
| 163 |
-
# This component itself does not expose an API endpoint if it's just static Markdown
|
| 164 |
-
# or updated via a gr.State and not directly via a `fn` in `click` with `show_api=True`.
|
| 165 |
-
# The key is that the function `get_public_ip_and_hostname` is called
|
| 166 |
-
# during the UI definition, not as an API endpoint.
|
| 167 |
-
)
|
| 168 |
-
|
| 169 |
with gr.Row():
|
| 170 |
youtube_url = gr.Textbox(
|
| 171 |
label="YouTube Video URL",
|
|
|
|
| 2 |
import torch
|
| 3 |
import os
|
| 4 |
import json
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Import your modules
|
| 7 |
from downloader import download_youtube_video
|
|
|
|
| 13 |
device, dtype = get_device_and_dtype()
|
| 14 |
|
| 15 |
# Default prompt
|
| 16 |
+
DEFAULT_PROMPT = """
|
| 17 |
+
Present the frame analysis in the following format, focusing on the details in the frame:
|
| 18 |
+
FRAME: {timestamp} \n
|
| 19 |
+
OBJECTS: List of objects with their count, for example: Bengal tiger - 1, Volvo car - 1, Person - 2 (male, female). Don't count the mention of an object in the text with the video as a separate object. \n
|
| 20 |
+
TEXT: This is where you place the text that is present in the frame. Just bring the text present in the frame from left to right, top to bottom. \n
|
| 21 |
+
BACKGROUND: Description of background and surroundings, e.g.: Muddy brown water. A road in the distance. An abandoned building on the horizon. Describe only what is visible. \n
|
| 22 |
+
ACTION: A detailed description of what is happening in the frame, for example: A Bengal tiger is swimming in murky water, its head and part of its back are visible above the surface. A blue Volvo car is driving along the road in the distance. A part of a tree is visible in the right part of the frame.
|
| 23 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
# --- OPTIMIZED FUNCTION, RETURNING JSON STRING ---
|
| 26 |
def analyze_video_data(youtube_url: str, prompt: str, quality: int, time_step: float) -> str:
|
|
|
|
| 39 |
str: A JSON formatted string containing the analysis results.
|
| 40 |
The JSON structure includes:
|
| 41 |
- "status": "success" if the analysis was successful, "error" otherwise.
|
| 42 |
+
- "message": A brief description of the outcome (e.g., "Analysis completed successfully."
|
| 43 |
+
or an error message).
|
| 44 |
- "frame_analysis": A list of dictionaries, where each dictionary represents a frame
|
| 45 |
and contains "timestamp" and "description".
|
| 46 |
- "audio_transcription": The transcribed text of the video's audio.
|
|
|
|
| 51 |
"""
|
| 52 |
|
| 53 |
results = {
|
| 54 |
+
"status": "success",
|
| 55 |
+
"message": "Analysis completed successfully.",
|
| 56 |
"frame_analysis": [],
|
| 57 |
"audio_transcription": ""
|
| 58 |
}
|
| 59 |
|
| 60 |
try:
|
| 61 |
+
|
| 62 |
+
# YouTube
|
| 63 |
+
cookies = os.getenv("YOUTUBE_COOKIES")
|
| 64 |
+
|
| 65 |
# 1. Download video
|
| 66 |
video_data = download_youtube_video(
|
| 67 |
url=youtube_url,
|
| 68 |
+
video_quality=quality,
|
| 69 |
+
youtube_cookies=cookies
|
| 70 |
)
|
| 71 |
|
| 72 |
# 2. Extract frames
|
|
|
|
| 73 |
frames_dict = extract_frames_with_timestamps(
|
| 74 |
video_path=video_data['video_path'],
|
| 75 |
output_dir=video_data['data_path'],
|
|
|
|
| 80 |
# 3. Generate descriptions for frames
|
| 81 |
descriptions = generate_frame_descriptions(
|
| 82 |
frames_dict=frames_dict,
|
| 83 |
+
custom_prompt=prompt, # Now `prompt` can contain the {timestamp} placeholder
|
| 84 |
device=device,
|
| 85 |
torch_dtype=dtype
|
| 86 |
)
|
|
|
|
| 97 |
})
|
| 98 |
|
| 99 |
results["audio_transcription"] = transcription_text
|
| 100 |
+
|
| 101 |
+
print("Video processing complete")
|
| 102 |
|
| 103 |
# Return formatted JSON string
|
| 104 |
return json.dumps(results, indent=2, ensure_ascii=False)
|
|
|
|
| 107 |
error_message = f"Processing error: {str(e)}"
|
| 108 |
print(f"An error occurred during video analysis: {e}") # For debugging
|
| 109 |
|
| 110 |
+
results["status"] = "error"
|
| 111 |
+
results["message"] = error_message
|
| 112 |
+
results["frame_analysis"] = []
|
| 113 |
+
results["audio_transcription"] = ""
|
| 114 |
|
| 115 |
+
# В случае ошибки возвращаем JSON-строку с ошибкой
|
| 116 |
return json.dumps(results, indent=2, ensure_ascii=False)
|
| 117 |
|
| 118 |
# Create Gradio interface
|
|
|
|
| 121 |
.output-box {border-radius: 8px !important; margin-top:15px;}
|
| 122 |
.results-output {background:#f8f8f8 !important; padding:15px !important;}
|
| 123 |
h1 {color: #1a73e8 !important;}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
""") as demo:
|
| 125 |
|
| 126 |
gr.Markdown("""
|
|
|
|
| 128 |
Analyze YouTube videos - get frame-by-frame descriptions with timestamps and audio transcription.
|
| 129 |
""")
|
| 130 |
|
| 131 |
+
# Top row: Video URL, prompt, analysis parameters, and analyze button
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
with gr.Row():
|
| 133 |
youtube_url = gr.Textbox(
|
| 134 |
label="YouTube Video URL",
|
app_srv/audio_processing.py
CHANGED
|
@@ -49,5 +49,5 @@ if __name__ == "__main__":
|
|
| 49 |
|
| 50 |
selected_device, selected_dtype = get_device_and_dtype()
|
| 51 |
|
| 52 |
-
result = transcribe_audio("/workspaces/Video_Analyser/app_srv/
|
| 53 |
print(result)
|
|
|
|
| 49 |
|
| 50 |
selected_device, selected_dtype = get_device_and_dtype()
|
| 51 |
|
| 52 |
+
result = transcribe_audio("/workspaces/Video_Analyser/app_srv/downloads/45677153-510d-4f47-95ee-c1b4b0843433/audio.mp3.mp3", selected_device, selected_dtype)
|
| 53 |
print(result)
|
app_srv/downloader.py
CHANGED
|
@@ -9,7 +9,8 @@ from datetime import datetime
|
|
| 9 |
|
| 10 |
def download_youtube_video(url: str,
|
| 11 |
base_dir: str = None,
|
| 12 |
-
video_quality: int = 720
|
|
|
|
| 13 |
"""
|
| 14 |
Downloads video and audio from YouTube, saving them to a unique GUID folder.
|
| 15 |
Metadata is saved in JSON format including download datetime and timezone.
|
|
@@ -75,6 +76,7 @@ def download_youtube_video(url: str,
|
|
| 75 |
'quiet': True,
|
| 76 |
'no_warnings': True,
|
| 77 |
'restrict_filenames': True,
|
|
|
|
| 78 |
}
|
| 79 |
|
| 80 |
with yt_dlp.YoutubeDL(video_opts) as ydl:
|
|
@@ -90,6 +92,7 @@ def download_youtube_video(url: str,
|
|
| 90 |
'preferredcodec': 'mp3',
|
| 91 |
'preferredquality': '128',
|
| 92 |
}],
|
|
|
|
| 93 |
}
|
| 94 |
|
| 95 |
with yt_dlp.YoutubeDL(audio_opts) as ydl:
|
|
|
|
| 9 |
|
| 10 |
def download_youtube_video(url: str,
|
| 11 |
base_dir: str = None,
|
| 12 |
+
video_quality: int = 720,
|
| 13 |
+
youtube_cookies: str = "") -> Dict[str, str]:
|
| 14 |
"""
|
| 15 |
Downloads video and audio from YouTube, saving them to a unique GUID folder.
|
| 16 |
Metadata is saved in JSON format including download datetime and timezone.
|
|
|
|
| 76 |
'quiet': True,
|
| 77 |
'no_warnings': True,
|
| 78 |
'restrict_filenames': True,
|
| 79 |
+
'cookiefile': youtube_cookies,
|
| 80 |
}
|
| 81 |
|
| 82 |
with yt_dlp.YoutubeDL(video_opts) as ydl:
|
|
|
|
| 92 |
'preferredcodec': 'mp3',
|
| 93 |
'preferredquality': '128',
|
| 94 |
}],
|
| 95 |
+
'cookiefile': youtube_cookies,
|
| 96 |
}
|
| 97 |
|
| 98 |
with yt_dlp.YoutubeDL(audio_opts) as ydl:
|
app_srv/test.ipynb
CHANGED
|
@@ -425,49 +425,10 @@
|
|
| 425 |
},
|
| 426 |
{
|
| 427 |
"cell_type": "code",
|
| 428 |
-
"execution_count":
|
| 429 |
"id": "e6d0e5fd",
|
| 430 |
"metadata": {},
|
| 431 |
-
"outputs": [
|
| 432 |
-
{
|
| 433 |
-
"name": "stdout",
|
| 434 |
-
"output_type": "stream",
|
| 435 |
-
"text": [
|
| 436 |
-
"Используемое устройство: cuda:0\n"
|
| 437 |
-
]
|
| 438 |
-
},
|
| 439 |
-
{
|
| 440 |
-
"name": "stderr",
|
| 441 |
-
"output_type": "stream",
|
| 442 |
-
"text": [
|
| 443 |
-
"Device set to use cuda:0\n"
|
| 444 |
-
]
|
| 445 |
-
},
|
| 446 |
-
{
|
| 447 |
-
"name": "stdout",
|
| 448 |
-
"output_type": "stream",
|
| 449 |
-
"text": [
|
| 450 |
-
"Начало транскрипции файла: /workspaces/Video_Analyser/app_srv/temp/a6fba6eb-038e-4f4e-bcb7-f41d87ee1422/audio.mp3.mp3\n"
|
| 451 |
-
]
|
| 452 |
-
},
|
| 453 |
-
{
|
| 454 |
-
"name": "stderr",
|
| 455 |
-
"output_type": "stream",
|
| 456 |
-
"text": [
|
| 457 |
-
"/usr/local/lib/python3.12/dist-packages/transformers/models/whisper/generation_whisper.py:573: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead.\n",
|
| 458 |
-
" warnings.warn(\n",
|
| 459 |
-
"Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.\n"
|
| 460 |
-
]
|
| 461 |
-
},
|
| 462 |
-
{
|
| 463 |
-
"name": "stdout",
|
| 464 |
-
"output_type": "stream",
|
| 465 |
-
"text": [
|
| 466 |
-
"Транскрипция завершена.\n",
|
| 467 |
-
"{0.0: \"with their signature orange fur and black stripes, Tigers have become icons of beauty, power, and the importance of conservation. Tigers have evolved into six subspecies. to six subspecies. The tiger's tail of evolution can be traced back to about two million years ago when the earliest known tiger ancestor left Africa and ventured into Asia. Over time, the big cat split into nine subspecies with six still alive today. The most numerous subspecies is the Bengal tiger, accounting for approximately 50% of the tiger population worldwide. Tigers are the world's largest cats. On average, the big cats weigh about 450 pounds, but the largest is the Siberian tiger subspecies, measuring up to 13 feet long and weighing up to 660 pounds. This extra weight is primarily because of large powerful muscles. Unlike lions, the second largest of the big cats, tigers have more muscle mass and are therefore heavier. Tigers have webbed toes, often living near bodies of water, tigers have adapted for a semi-aquatic lifestyle. They appear to enjoy being in the water, unlike most cats, and are excellent swimmers using their powerful muscles to propel their bodies in the water. They've also evolved to have webbing between their toes. The webbing allows the big cats to push around a greater volume of water with each stroke of their paws. White tigers are a type of Bengal tiger. The classic black and orange coloration of most tigers is caused by the pigments U melanin, which turns for black, and pheomelanin, which turns for black, and pheomelanin, which turns fur orange. The production of pheomelanin is triggered by the gene SLC45A2. White tigers carry a mutated version of this gene which prevents them from producing orange pigmentation. Fewer than 4,000 tigers remain in the wild. At the turn of the 20th century, approximately 100,000 tigers roamed the wild, living as far west as Turkey and as far north as Russia, but due to deforestation, human development, and poaching, their range severely decreased and within 100 years, the world's tiger population declined by about 96%. by about 96%. Over the past few decades, programs have been put in place to protect tigers and their habitats with the help of the global community, tiger populations may slowly rebound. slowly rebound. Music. you Thank you. you you you you you you Music playing you Music playing you you you Music playing you you\"}\n"
|
| 468 |
-
]
|
| 469 |
-
}
|
| 470 |
-
],
|
| 471 |
"source": [
|
| 472 |
"import torch\n",
|
| 473 |
"from transformers import pipeline\n",
|
|
@@ -537,20 +498,10 @@
|
|
| 537 |
},
|
| 538 |
{
|
| 539 |
"cell_type": "code",
|
| 540 |
-
"execution_count":
|
| 541 |
"id": "ca9a4832",
|
| 542 |
"metadata": {},
|
| 543 |
-
"outputs": [
|
| 544 |
-
{
|
| 545 |
-
"name": "stderr",
|
| 546 |
-
"output_type": "stream",
|
| 547 |
-
"text": [
|
| 548 |
-
"Device set to use cuda:0\n",
|
| 549 |
-
"/usr/local/lib/python3.12/dist-packages/transformers/models/whisper/generation_whisper.py:573: FutureWarning: The input name `inputs` is deprecated. Please make sure to use `input_features` instead.\n",
|
| 550 |
-
" warnings.warn(\n"
|
| 551 |
-
]
|
| 552 |
-
}
|
| 553 |
-
],
|
| 554 |
"source": [
|
| 555 |
"from transformers import pipeline\n",
|
| 556 |
"import librosa\n",
|
|
@@ -571,21 +522,10 @@
|
|
| 571 |
},
|
| 572 |
{
|
| 573 |
"cell_type": "code",
|
| 574 |
-
"execution_count":
|
| 575 |
"id": "7cd4e28e",
|
| 576 |
"metadata": {},
|
| 577 |
-
"outputs": [
|
| 578 |
-
{
|
| 579 |
-
"data": {
|
| 580 |
-
"text/plain": [
|
| 581 |
-
"\" With their signature orange fur and black stripes, tigers have become icons of beauty, power, and the importance of conservation. Tigers have evolved into six subspecies. The tiger's tale of evolution can be traced back to about two million years ago when the earliest known tiger ancestor left Africa and ventured into Asia. Over time, the big cats split into nine subspecies, with six still alive today. The most numerous subspecies is the Bengal tiger, accounting for approximately 50% of the tiger population worldwide. Tigers are the world's largest cats. On average, the big cats weigh about 450 pounds, but the largest is the Siberian tiger subspecies, measuring up to 13 feet long and weighing up to 660 pounds. This extra weight is primarily because of large powerful muscles. Unlike lions, the second largest of the big cats, tigers have more muscle mass and are therefore heavier. Tigers have webbed toes. Even living near bodies of water, tigers have adapted for a semi-aquatic lifestyle. They appear to enjoy being in the water, unlike most cats, and are excellent swimmers, using their powerful muscles to propel their bodies in the water. They've also evolved to have webbing between their toes. The webbing allows the big cats to push around a greater volume of water with each stroke of their paws. White tigers are a type of Bengal tiger. The classic black and orange coloration of most tigers is caused by the pigments U-melanin which turns for black and pheomelanin which turns for orange. The production of pheomelanin is triggered by the gene SLC45A2. White tigers carry a mutated version of this gene which prevents them from producing orange pigmentation. Fewer than 4,000 tigers remain in the wild. At the turn of the 20th century, approximately 100,000 tigers roamed the wild, living as far west as Turkey and as far north as Russia. But due to deforestation, human development, and poaching, their range severely decreased, within 100 years, the world's tiger population declined by about 96%. Over the past few decades, programs have been put in place to protect tigers and their habitats. With the help of the global community, tiger populations may slowly rebound. Thank you very much for watching this video, and I'll see you in the next one.\""
|
| 582 |
-
]
|
| 583 |
-
},
|
| 584 |
-
"execution_count": 23,
|
| 585 |
-
"metadata": {},
|
| 586 |
-
"output_type": "execute_result"
|
| 587 |
-
}
|
| 588 |
-
],
|
| 589 |
"source": [
|
| 590 |
"result"
|
| 591 |
]
|
|
|
|
| 425 |
},
|
| 426 |
{
|
| 427 |
"cell_type": "code",
|
| 428 |
+
"execution_count": null,
|
| 429 |
"id": "e6d0e5fd",
|
| 430 |
"metadata": {},
|
| 431 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
"source": [
|
| 433 |
"import torch\n",
|
| 434 |
"from transformers import pipeline\n",
|
|
|
|
| 498 |
},
|
| 499 |
{
|
| 500 |
"cell_type": "code",
|
| 501 |
+
"execution_count": null,
|
| 502 |
"id": "ca9a4832",
|
| 503 |
"metadata": {},
|
| 504 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
"source": [
|
| 506 |
"from transformers import pipeline\n",
|
| 507 |
"import librosa\n",
|
|
|
|
| 522 |
},
|
| 523 |
{
|
| 524 |
"cell_type": "code",
|
| 525 |
+
"execution_count": null,
|
| 526 |
"id": "7cd4e28e",
|
| 527 |
"metadata": {},
|
| 528 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
"source": [
|
| 530 |
"result"
|
| 531 |
]
|
packages.txt
CHANGED
|
@@ -12,4 +12,5 @@ python3-setuptools
|
|
| 12 |
python-is-python3
|
| 13 |
wget
|
| 14 |
zlib1g
|
| 15 |
-
net-tools
|
|
|
|
|
|
| 12 |
python-is-python3
|
| 13 |
wget
|
| 14 |
zlib1g
|
| 15 |
+
net-tools
|
| 16 |
+
curl
|
requirements.txt
CHANGED
|
@@ -14,4 +14,5 @@ ipykernel==6.29.5
|
|
| 14 |
ipywidgets==8.1.7
|
| 15 |
yt-dlp==2025.5.22
|
| 16 |
qwen-vl-utils==0.0.11
|
| 17 |
-
librosa==0.11.0
|
|
|
|
|
|
| 14 |
ipywidgets==8.1.7
|
| 15 |
yt-dlp==2025.5.22
|
| 16 |
qwen-vl-utils==0.0.11
|
| 17 |
+
librosa==0.11.0
|
| 18 |
+
gradio_client
|