Spaces:

ParsaKhaz
/

redact-video-demo

Running on Zero

App Files Files Community

ParsaKhaz commited on 26 days ago

Commit

12d83f6

verified ·

1 Parent(s): edf540a

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -75

app.py CHANGED Viewed

@@ -20,25 +20,28 @@ print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 print("Loading Moondream model...")
 model, tokenizer = load_moondream()
 # Uncomment for Hugging Face Spaces
 @spaces.GPU(duration=120)
-def process_video_file(video_file, detect_keyword, box_style, ffmpeg_preset, rows, cols, test_mode):
     """Process a video file through the Gradio interface."""
     try:
         if not video_file:
             raise gr.Error("Please upload a video file")
         # Ensure input/output directories exist using absolute paths
-        inputs_dir = os.path.join(WORKSPACE_ROOT, 'inputs')
-        outputs_dir = os.path.join(WORKSPACE_ROOT, 'outputs')
         os.makedirs(inputs_dir, exist_ok=True)
         os.makedirs(outputs_dir, exist_ok=True)
         # Copy uploaded video to inputs directory
         video_filename = f"input_{os.path.basename(video_file)}"
         input_video_path = os.path.join(inputs_dir, video_filename)
         shutil.copy2(video_file, input_video_path)
         try:
             # Process the video
             output_path = process_video(
@@ -48,31 +51,37 @@ def process_video_file(video_file, detect_keyword, box_style, ffmpeg_preset, row
                 ffmpeg_preset=ffmpeg_preset,
                 rows=rows,
                 cols=cols,
-                box_style=box_style
             )
             # Verify output exists and is readable
             if not output_path or not os.path.exists(output_path):
                 print(f"Warning: Output path {output_path} does not exist")
                 # Try to find the output based on expected naming convention
-                expected_output = os.path.join(outputs_dir, f'{box_style}_{detect_keyword}_{video_filename}')
                 if os.path.exists(expected_output):
                     output_path = expected_output
                 else:
                     # Try searching in outputs directory for any matching file
-                    matching_files = [f for f in os.listdir(outputs_dir) if f.startswith(f'{box_style}_{detect_keyword}_')]
                     if matching_files:
                         output_path = os.path.join(outputs_dir, matching_files[0])
                     else:
                         raise gr.Error("Failed to locate output video")
             # Convert output path to absolute path if it isn't already
             if not os.path.isabs(output_path):
                 output_path = os.path.join(WORKSPACE_ROOT, output_path)
             print(f"Returning output path: {output_path}")
             return output_path
         finally:
             # Clean up input file
             try:
@@ -80,92 +89,113 @@ def process_video_file(video_file, detect_keyword, box_style, ffmpeg_preset, row
                     os.remove(input_video_path)
             except:
                 pass
     except Exception as e:
         print(f"Error in process_video_file: {str(e)}")
         raise gr.Error(f"Error processing video: {str(e)}")
 # Create the Gradio interface
 with gr.Blocks(title="Promptable Video Redaction") as app:
     gr.Markdown("# Promptable Video Redaction with Moondream")
-    gr.Markdown("""
-    This app uses [Moondream 2B](https://github.com/vikhyat/moondream), a powerful yet lightweight vision-language model,
-    to detect and visualize objects in videos. Moondream can recognize a wide variety of objects, people, text, and more
-    with high accuracy while being much smaller than traditional models. This enables Moondream to redact content from
-    video quickly with its [object detection](https://docs.moondream.ai/cloud/detect) capabilities.
-    Upload a video and specify what you want to detect. The app will process each frame using Moondream and visualize
-    the detections using your chosen style. Join the [Moondream Discord server](https://discord.com/invite/tRUdpjDQfH) if you have questions on how this works.
-    """)
     with gr.Row():
         with gr.Column():
             # Input components
             video_input = gr.Video(label="Upload Video")
             detect_input = gr.Textbox(
-                label="What to Detect",
-                placeholder="e.g. face, logo, text, person, car, dog, etc.",
                 value="face",
-                info="Moondream can detect almost anything you can describe in natural language"
-            )
-            box_style_input = gr.Radio(
-                choices=['censor', 'bounding-box', 'hitmarker'],
-                value='censor',
-                label="Visualization Style",
-                info="Choose how to display detections"
-            )
-            preset_input = gr.Dropdown(
-                choices=['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow'],
-                value='medium',
-                label="Processing Speed (faster = lower quality)"
             )
-            with gr.Row():
-                rows_input = gr.Slider(minimum=1, maximum=4, value=1, step=1, label="Grid Rows")
-                cols_input = gr.Slider(minimum=1, maximum=4, value=1, step=1, label="Grid Columns")
-            test_mode_input = gr.Checkbox(
-                label="Test Mode (Process first 3 seconds only)",
-                value=True,
-                info="Enable to quickly test settings on a short clip before processing the full video (recommended)"
-            )
             process_btn = gr.Button("Process Video", variant="primary")
-            gr.Markdown("""
-            Note: Processing in test mode will only process the first 3 seconds of the video and is recommended for testing settings.
-            """)
-            gr.Markdown("""
-            We can get a rough estimate of how long the video will take to process by multiplying the videos framerate * seconds * the number of rows and columns and assuming 0.12 seconds processing time per detection.
-            For example, a 3 second video at 30fps with 2x2 grid, the estimated time is 3 * 30 * 2 * 2 * 0.12 = 43.2 seconds (tested on a 4090 GPU).
-            """)
         with gr.Column():
             # Output components
             video_output = gr.Video(label="Processed Video")
             # About section under the video output
-            gr.Markdown("""
-            ### About Moondream
-            Moondream is a tiny yet powerful vision-language model that can analyze images and answer questions about them.
-            It's designed to be lightweight and efficient while maintaining high accuracy. Some key features:
-            - Only 2B parameters
-            - Fast inference with minimal resource requirements
-            - Supports CPU and GPU execution
-            - Open source and free to use
-            Links:
             - [GitHub Repository](https://github.com/vikhyat/moondream)
-            - [Hugging Face Space](https://huggingface.co/vikhyatk/moondream2)
             - [Python Package](https://pypi.org/project/moondream/)
-            - [Promptable Redaction Recipe](https://docs.moondream.ai/recipes)
-            """)
     # Event handlers
     process_btn.click(
         fn=process_video_file,
-        inputs=[video_input, detect_input, box_style_input, preset_input, rows_input, cols_input, test_mode_input],
-        outputs=video_output
     )
 if __name__ == "__main__":
-    app.launch(share=True)

 print("Loading Moondream model...")
 model, tokenizer = load_moondream()
 # Uncomment for Hugging Face Spaces
 @spaces.GPU(duration=120)
+def process_video_file(
+    video_file, detect_keyword, box_style, ffmpeg_preset, rows, cols, test_mode
+):
     """Process a video file through the Gradio interface."""
     try:
         if not video_file:
             raise gr.Error("Please upload a video file")
         # Ensure input/output directories exist using absolute paths
+        inputs_dir = os.path.join(WORKSPACE_ROOT, "inputs")
+        outputs_dir = os.path.join(WORKSPACE_ROOT, "outputs")
         os.makedirs(inputs_dir, exist_ok=True)
         os.makedirs(outputs_dir, exist_ok=True)
         # Copy uploaded video to inputs directory
         video_filename = f"input_{os.path.basename(video_file)}"
         input_video_path = os.path.join(inputs_dir, video_filename)
         shutil.copy2(video_file, input_video_path)
         try:
             # Process the video
             output_path = process_video(
                 ffmpeg_preset=ffmpeg_preset,
                 rows=rows,
                 cols=cols,
+                box_style=box_style,
             )
             # Verify output exists and is readable
             if not output_path or not os.path.exists(output_path):
                 print(f"Warning: Output path {output_path} does not exist")
                 # Try to find the output based on expected naming convention
+                expected_output = os.path.join(
+                    outputs_dir, f"{box_style}_{detect_keyword}_{video_filename}"
+                )
                 if os.path.exists(expected_output):
                     output_path = expected_output
                 else:
                     # Try searching in outputs directory for any matching file
+                    matching_files = [
+                        f
+                        for f in os.listdir(outputs_dir)
+                        if f.startswith(f"{box_style}_{detect_keyword}_")
+                    ]
                     if matching_files:
                         output_path = os.path.join(outputs_dir, matching_files[0])
                     else:
                         raise gr.Error("Failed to locate output video")
             # Convert output path to absolute path if it isn't already
             if not os.path.isabs(output_path):
                 output_path = os.path.join(WORKSPACE_ROOT, output_path)
             print(f"Returning output path: {output_path}")
             return output_path
         finally:
             # Clean up input file
             try:
                     os.remove(input_video_path)
             except:
                 pass
     except Exception as e:
         print(f"Error in process_video_file: {str(e)}")
         raise gr.Error(f"Error processing video: {str(e)}")
 # Create the Gradio interface
 with gr.Blocks(title="Promptable Video Redaction") as app:
     gr.Markdown("# Promptable Video Redaction with Moondream")
+    gr.Markdown(
+        """
+    [Moondream 2B](https://github.com/vikhyat/moondream) is a lightweight vision model that detects and visualizes objects in videos. It can identify objects, people, text and more.
+    Upload a video and specify what to detect. The app will process each frame and apply your chosen visualization style. For help, join the [Moondream Discord](https://discord.com/invite/tRUdpjDQfH).
+    """
+    )
     with gr.Row():
         with gr.Column():
             # Input components
             video_input = gr.Video(label="Upload Video")
             detect_input = gr.Textbox(
+                label="What to Detect",
+                placeholder="e.g. face, logo, text, person, car, dog, etc.",
                 value="face",
+                info="Moondream can detect anything that you can describe in natural language",
             )
             process_btn = gr.Button("Process Video", variant="primary")
+            with gr.Accordion("Advanced Settings", open=False):
+                box_style_input = gr.Radio(
+                    choices=["censor", "bounding-box", "hitmarker"],
+                    value="censor",
+                    label="Visualization Style",
+                    info="Choose how to display detections",
+                )
+                preset_input = gr.Dropdown(
+                    choices=[
+                        "ultrafast",
+                        "superfast",
+                        "veryfast",
+                        "faster",
+                        "fast",
+                        "medium",
+                        "slow",
+                        "slower",
+                        "veryslow",
+                    ],
+                    value="medium",
+                    label="Processing Speed (faster = lower quality)",
+                )
+                with gr.Row():
+                    rows_input = gr.Slider(
+                        minimum=1, maximum=4, value=1, step=1, label="Grid Rows"
+                    )
+                    cols_input = gr.Slider(
+                        minimum=1, maximum=4, value=1, step=1, label="Grid Columns"
+                    )
+                test_mode_input = gr.Checkbox(
+                    label="Test Mode (Process first 3 seconds only)",
+                    value=True,
+                    info="Enable to quickly test settings on a short clip before processing the full video (recommended)",
+                )
+                gr.Markdown(
+                    """
+                Note: Processing in test mode will only process the first 3 seconds of the video and is recommended for testing settings.
+                """
+                )
+                gr.Markdown(
+                    """
+                We can get a rough estimate of how long the video will take to process by multiplying the videos framerate * seconds * the number of rows and columns and assuming 0.12 seconds processing time per detection.
+                For example, a 3 second video at 30fps with 2x2 grid, the estimated time is 3 * 30 * 2 * 2 * 0.12 = 43.2 seconds (tested on a 4090 GPU).
+                """
+                )
         with gr.Column():
             # Output components
             video_output = gr.Video(label="Processed Video")
             # About section under the video output
+            gr.Markdown(
+                """
+            ### Links:
             - [GitHub Repository](https://github.com/vikhyat/moondream)
+            - [Hugging Face](https://huggingface.co/vikhyatk/moondream2)
             - [Python Package](https://pypi.org/project/moondream/)
+            - [Moondream Recipes](https://docs.moondream.ai/recipes)
+            """
+            )
     # Event handlers
     process_btn.click(
         fn=process_video_file,
+        inputs=[
+            video_input,
+            detect_input,
+            box_style_input,
+            preset_input,
+            rows_input,
+            cols_input,
+            test_mode_input,
+        ],
+        outputs=video_output,
     )
 if __name__ == "__main__":
+    app.launch(share=True)