Spaces:

artyomboyko
/

Aura_AI_Scan

Running

App Files Files Community

Artyom Boyko commited on Jun 5

Commit

c867d05

1 Parent(s): 56f0d7c

Testing a new variant of Gradion MCP server.

Browse files

Files changed (2) hide show

app_srv/app_srv.py +157 -95
requirements.txt +1 -1

app_srv/app_srv.py CHANGED Viewed

@@ -1,151 +1,213 @@
 import gradio as gr
 import torch
 import os
-from PIL import Image
-import base64
-from io import BytesIO
-from pathlib import Path
 from downloader import download_youtube_video
 from video_processing import extract_frames_with_timestamps, generate_frame_descriptions
 from audio_processing import transcribe_audio
 from model_api import get_device_and_dtype
-# Инициализация устройства и типа данных
 device, dtype = get_device_and_dtype()
-# Промпт по умолчанию
 DEFAULT_PROMPT = "Analyze the frame, describe what objects are in the frame, how many there are, the background and the action taking place."
-def process_video(youtube_url: str, prompt: str, quality: str, time_step: float):
-    """Основная функция обработки видео"""
     try:
-        # 1. Скачивание видео
         video_data = download_youtube_video(
             url=youtube_url,
             video_quality=quality
         )
-        # 2. Извлечение кадров с CUDA
-        frames = extract_frames_with_timestamps(
             video_path=video_data['video_path'],
             output_dir=video_data['data_path'],
             time_step=time_step,
             hw_device="cuda"
         )
-        # 3. Генерация описаний
         descriptions = generate_frame_descriptions(
-            frames_dict=frames,
             custom_prompt=prompt,
             device=device,
             torch_dtype=dtype
         )
-        # 4. Транскрипция аудио
-        transcription = transcribe_audio(video_data['audio_path'])
-        # 5. Форматировани�� результатов
-        results_html = []
-        for timestamp, frame_path in frames.items():
-            # Получаем описание для текущего кадра
-            frame_desc = descriptions.get(timestamp, "No description available")
-            # Обработка изображения
-            if os.path.exists(frame_path):
-                with Image.open(frame_path) as img:
-                    img.thumbnail((400, 400))
-                    buffered = BytesIO()
-                    img.save(buffered, format="JPEG", quality=85)
-                    img_base64 = base64.b64encode(buffered.getvalue()).decode()
-                    img_html = f'<img src="data:image/jpeg;base64,{img_base64}" style="max-height:300px; border-radius:5px; border:1px solid #ddd;">'
-            else:
-                img_html = f'<div style="color:red; padding:10px;">Image not found</div>'
-            # Форматирование HTML блока
-            frame_html = f"""
-            <div style="border:1px solid #e0e0e0; border-radius:8px; padding:15px; margin-bottom:20px; background:#f8f8f8;">
-                <div style="display:flex; gap:20px; align-items:flex-start;">
-                    <div style="flex:1; min-width:300px; display:flex; justify-content:center; align-items:center;">
-                        {img_html}
-                    </div>
-                    <div style="flex:2;">
-                        <h3 style="margin-top:0; color:#222; font-size:16px; font-weight:600;">Timestamp: {timestamp}</h3>
-                        <div style="background:#fff; padding:15px; border-radius:6px; border-left:4px solid #4285f4;
-                                    color:#333; font-size:14px; line-height:1.5; box-shadow:0 1px 3px rgba(0,0,0,0.1);">
-                            {frame_desc}
-                        </div>
-                    </div>
-                </div>
-            </div>
-            """
-            results_html.append(frame_html)
-        return "\n".join(results_html), transcription
     except Exception as e:
-        return f"❌ Processing error: {str(e)}", ""
-# Создание Gradio интерфейса
 with gr.Blocks(title="Video Analysis Tool", css="""
     .gradio-container {max-width: 1200px !important}
-    .frame-results {max-height: 70vh; overflow-y: auto; padding-right:10px;}
     .output-box {border-radius: 8px !important; margin-top:15px;}
-    .audio-output {background:#f8f8f8 !important; padding:15px !important;}
     h1 {color: #1a73e8 !important;}
 """) as demo:
     gr.Markdown("""
     # 🎥 Video Analysis Tool
-    Analyze YouTube videos - get frame-by-frame descriptions with timestamps
     """)
     with gr.Row():
-        with gr.Column(scale=1, min_width=400):
-            youtube_url = gr.Textbox(
-                label="YouTube Video URL",
-                value="https://www.youtube.com/watch?v=FK3dav4bA4s&t=1s",
-                lines=1
             )
-            prompt = gr.Textbox(
-                label="Analysis Prompt",
-                value=DEFAULT_PROMPT,
-                lines=5,
-                max_lines=10
             )
-            with gr.Row():
-                quality = gr.Dropdown(
-                    label="Video Quality",
-                    choices=[144, 240, 360, 480, 720, 1080, 1440, 2160],
-                    value=720
-                )
-                time_step = gr.Slider(
-                    label="Frame Interval (seconds)",
-                    minimum=0.5,
-                    maximum=30,
-                    step=0.5,
-                    value=2
-                )
-            submit_btn = gr.Button("Analyze Video", variant="primary")
-        with gr.Column(scale=2):
-            video_output = gr.HTML(
-                label="Frame Analysis Results",
-                elem_classes=["frame-results", "output-box"]
-            )
-            audio_output = gr.Textbox(
-                label="Audio Transcription",
-                interactive=False,
-                lines=10,
-                max_lines=15,
-                elem_classes=["output-box", "audio-output"]
-            )
     submit_btn.click(
-        fn=process_video,
         inputs=[youtube_url, prompt, quality, time_step],
-        outputs=[video_output, audio_output]
     )
 if __name__ == "__main__":
-    demo.launch(mcp_server=True)

 import gradio as gr
 import torch
 import os
+import json
+import requests # Added for making HTTP requests
+import socket   # Added for getting hostname
+# Import your modules
 from downloader import download_youtube_video
 from video_processing import extract_frames_with_timestamps, generate_frame_descriptions
 from audio_processing import transcribe_audio
 from model_api import get_device_and_dtype
+# Initialize device and data type
 device, dtype = get_device_and_dtype()
+# Default prompt
 DEFAULT_PROMPT = "Analyze the frame, describe what objects are in the frame, how many there are, the background and the action taking place."
+# --- FUNCTION TO GET PUBLIC IP AND HOSTNAME (NOT FOR MCP) ---
+def get_public_ip_and_hostname() -> str:
+    """
+    Retrieves the public IP address and the hostname of the machine.
+    This function is intended for display purposes within the Gradio UI
+    and should NOT be exposed via MCP API.
+    """
+    public_ip = "N/A"
+    hostname = "N/A"
+    try:
+        # Get public IP address
+        response = requests.get("https://api.ipify.org?format=json", timeout=5)
+        response.raise_for_status() # Raise an exception for HTTP errors
+        public_ip = response.json().get("ip", "N/A")
+    except requests.exceptions.RequestException as e:
+        print(f"Error getting public IP: {e}")
+        public_ip = f"Error: {e}"
     try:
+        # Get hostname
+        hostname = socket.gethostname()
+    except Exception as e:
+        print(f"Error getting hostname: {e}")
+        hostname = f"Error: {e}"
+    return f"Public IP: {public_ip} | Hostname: {hostname}"
+# --- OPTIMIZED FUNCTION, RETURNING JSON STRING ---
+def analyze_video_data(youtube_url: str, prompt: str, quality: int, time_step: float) -> str:
+    """
+    Analyzes a YouTube video by downloading it, extracting frames, generating descriptions
+    for each frame, and transcribing the audio.
+    Args:
+        youtube_url (str): The URL of the YouTube video to analyze.
+        prompt (str): A custom prompt to guide the frame description generation.
+        quality (int): The desired video quality in pixels (e.g., 144, 240, 360, 480, 720, 1080, 1440, 2160).
+                       Note: The actual quality might vary based on available streams.
+        time_step (float): The interval in seconds at which to extract frames. The lower the value, the better the quality of the analysis result.
+    Returns:
+        str: A JSON formatted string containing the analysis results.
+             The JSON structure includes:
+             - "status": "success" if the analysis was successful, "error" otherwise.
+             - "message": A brief description of the outcome (empty string for success,
+                          or an error message for error).
+             - "frame_analysis": A list of dictionaries, where each dictionary represents a frame
+                                 and contains "timestamp" and "description".
+             - "audio_transcription": The transcribed text of the video's audio.
+    Raises:
+        Exception: Catches any exceptions during the process and returns them
+                   within the JSON output for user feedback.
+    """
+    results = {
+        "status": "success",  # Default to success
+        "message": "",        # Default message is empty for success
+        "frame_analysis": [],
+        "audio_transcription": ""
+    }
+    try:
+        # 1. Download video
         video_data = download_youtube_video(
             url=youtube_url,
             video_quality=quality
         )
+        # 2. Extract frames
+        # frames_dict: {timestamp: path_to_frame_image}
+        frames_dict = extract_frames_with_timestamps(
             video_path=video_data['video_path'],
             output_dir=video_data['data_path'],
             time_step=time_step,
             hw_device="cuda"
         )
+        # 3. Generate descriptions for frames
         descriptions = generate_frame_descriptions(
+            frames_dict=frames_dict,
             custom_prompt=prompt,
             device=device,
             torch_dtype=dtype
         )
+        # 4. Transcribe audio
+        transcription_text = transcribe_audio(video_data['audio_path'])
+        # 5. Formulate results structure
+        for timestamp, frame_path in frames_dict.items():
+            description = descriptions.get(timestamp, "No description available")
+            results["frame_analysis"].append({
+                "timestamp": timestamp,
+                "description": description,
+            })
+        results["audio_transcription"] = transcription_text
+        # Return formatted JSON string
+        return json.dumps(results, indent=2, ensure_ascii=False)
     except Exception as e:
+        error_message = f"Processing error: {str(e)}"
+        print(f"An error occurred during video analysis: {e}") # For debugging
+        results["status"] = "error" # Set status to error
+        results["message"] = error_message # Set error message
+        results["frame_analysis"] = [] # Clear frame results on error
+        results["audio_transcription"] = "" # Clear transcription on error
+        # In case of error, return JSON string with error details
+        return json.dumps(results, indent=2, ensure_ascii=False)
+# Create Gradio interface
 with gr.Blocks(title="Video Analysis Tool", css="""
     .gradio-container {max-width: 1200px !important}
     .output-box {border-radius: 8px !important; margin-top:15px;}
+    .results-output {background:#f8f8f8 !important; padding:15px !important;}
     h1 {color: #1a73e8 !important;}
+    .ip-info {
+        font-size: 0.9em;
+        color: #666;
+        margin-top: -15px; /* Adjust as needed to pull it closer to the title */
+        margin-bottom: 10px;
+    }
 """) as demo:
     gr.Markdown("""
     # 🎥 Video Analysis Tool
+    Analyze YouTube videos - get frame-by-frame descriptions with timestamps and audio transcription.
     """)
+    # NEW: Display Public IP and Hostname
+    # We use a gr.Markdown component to display the text.
+    # The key here is that get_public_ip_and_hostname is NOT directly an input/output
+    # of a button. It's called once when the app loads, or its output is static.
+    # To prevent it from being in MCP API, we typically don't expose it via gr.Interface
+    # or explicitly set show_api=False for the component if it were interactive.
+    # Here, it's a simple call rendered in Markdown, so it won't be exposed.
+    gr.Markdown(
+        f"<div class='ip-info'>{get_public_ip_and_hostname()}</div>",
+        # This component itself does not expose an API endpoint if it's just static Markdown
+        # or updated via a gr.State and not directly via a `fn` in `click` with `show_api=True`.
+        # The key is that the function `get_public_ip_and_hostname` is called
+        # during the UI definition, not as an API endpoint.
+    )
     with gr.Row():
+        youtube_url = gr.Textbox(
+            label="YouTube Video URL",
+            value="https://www.youtube.com/watch?v=FK3dav4bA4s",
+            lines=1,
+            scale=3
+        )
+        prompt = gr.Textbox(
+            label="Analysis Prompt",
+            value=DEFAULT_PROMPT,
+            lines=3,
+            max_lines=5,
+            scale=4
+        )
+        with gr.Column(scale=2, min_width=200):
+            quality = gr.Dropdown(
+                label="Video Quality",
+                choices=[144, 240, 360, 480, 720, 1080, 1440, 2160],
+                value=480
             )
+            time_step = gr.Slider(
+                label="Frame Interval (seconds)",
+                minimum=0.5,
+                maximum=30,
+                step=0.5,
+                value=30
             )
+            submit_btn = gr.Button("Start Video Analysis", variant="primary")
+    # Next row: Analysis results (gr.JSON)
+    with gr.Row():
+        results_json_viewer = gr.JSON(
+            label="Raw Analysis Results (JSON)",
+            elem_classes=["output-box", "results-output"],
+        )
+    # Direct binding of the button to the single processing function
     submit_btn.click(
+        fn=analyze_video_data,
         inputs=[youtube_url, prompt, quality, time_step],
+        outputs=[results_json_viewer]
     )
 if __name__ == "__main__":
+    demo.launch(share=False, mcp_server=True)

requirements.txt CHANGED Viewed

@@ -5,7 +5,7 @@ tqdm==4.67.1
 datasets==3.6.0
 evaluate==0.4.3
 accelerate==1.7.0
-gradio==5.32.1
 gradio[mcp]
 ipython==9.3.0
 ipywidgets==8.1.7

 datasets==3.6.0
 evaluate==0.4.3
 accelerate==1.7.0
+gradio==5.33.0
 gradio[mcp]
 ipython==9.3.0
 ipywidgets==8.1.7