Spaces:

ParsaKhaz
/

redact-video-demo

Running on Zero

App Files Files Community

redact-video-demo / app.py

ParsaKhaz

Upload folder using huggingface_hub

45ac42f verified 26 days ago

raw

history blame

7.2 kB

	#!/usr/bin/env python3
	import gradio as gr
	import os
	from main import load_moondream, process_video
	import tempfile
	import shutil

	# Get absolute path to workspace root
	WORKSPACE_ROOT = os.path.dirname(os.path.abspath(__file__))

	# Initialize model globally for reuse
	print("Loading Moondream model...")
	model, tokenizer = load_moondream()

	def process_video_file(video_file, detect_keyword, box_style, ffmpeg_preset, rows, cols, test_mode):
	"""Process a video file through the Gradio interface."""
	try:
	if not video_file:
	raise gr.Error("Please upload a video file")

	# Ensure input/output directories exist using absolute paths
	inputs_dir = os.path.join(WORKSPACE_ROOT, 'inputs')
	outputs_dir = os.path.join(WORKSPACE_ROOT, 'outputs')
	os.makedirs(inputs_dir, exist_ok=True)
	os.makedirs(outputs_dir, exist_ok=True)

	# Copy uploaded video to inputs directory
	video_filename = f"input_{os.path.basename(video_file)}"
	input_video_path = os.path.join(inputs_dir, video_filename)
	shutil.copy2(video_file, input_video_path)

	try:
	# Process the video
	output_path = process_video(
	input_video_path,
	detect_keyword,
	test_mode=test_mode,
	ffmpeg_preset=ffmpeg_preset,
	rows=rows,
	cols=cols,
	box_style=box_style
	)

	# Verify output exists and is readable
	if not output_path or not os.path.exists(output_path):
	print(f"Warning: Output path {output_path} does not exist")
	# Try to find the output based on expected naming convention
	expected_output = os.path.join(outputs_dir, f'{box_style}_{detect_keyword}_{video_filename}')
	if os.path.exists(expected_output):
	output_path = expected_output
	else:
	# Try searching in outputs directory for any matching file
	matching_files = [f for f in os.listdir(outputs_dir) if f.startswith(f'{box_style}_{detect_keyword}_')]
	if matching_files:
	output_path = os.path.join(outputs_dir, matching_files[0])
	else:
	raise gr.Error("Failed to locate output video")

	# Convert output path to absolute path if it isn't already
	if not os.path.isabs(output_path):
	output_path = os.path.join(WORKSPACE_ROOT, output_path)

	print(f"Returning output path: {output_path}")
	return output_path

	finally:
	# Clean up input file
	try:
	if os.path.exists(input_video_path):
	os.remove(input_video_path)
	except:
	pass

	except Exception as e:
	print(f"Error in process_video_file: {str(e)}")
	raise gr.Error(f"Error processing video: {str(e)}")

	# Create the Gradio interface
	with gr.Blocks(title="Video Object Detection with Moondream") as app:
	gr.Markdown("# Video Object Detection with Moondream")
	gr.Markdown("""
	This app uses [Moondream](https://github.com/vikhyat/moondream), a powerful yet lightweight vision-language model,
	to detect and visualize objects in videos. Moondream can recognize a wide variety of objects, people, text, and more
	with high accuracy while being much smaller than traditional models.

	Upload a video and specify what you want to detect. The app will process each frame using Moondream and visualize
	the detections using your chosen style.
	""")

	with gr.Row():
	with gr.Column():
	# Input components
	video_input = gr.Video(label="Upload Video")
	detect_input = gr.Textbox(
	label="What to Detect",
	placeholder="e.g. face, logo, text, person, car, dog, etc.",
	value="face",
	info="Moondream can detect almost anything you can describe in natural language"
	)
	box_style_input = gr.Radio(
	choices=['censor', 'yolo', 'hitmarker'],
	value='censor',
	label="Visualization Style",
	info="Choose how to display detections"
	)
	preset_input = gr.Dropdown(
	choices=['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow'],
	value='medium',
	label="Processing Speed (faster = lower quality)"
	)
	with gr.Row():
	rows_input = gr.Slider(minimum=1, maximum=4, value=1, step=1, label="Grid Rows")
	cols_input = gr.Slider(minimum=1, maximum=4, value=1, step=1, label="Grid Columns")

	test_mode_input = gr.Checkbox(
	label="Test Mode (Process first 3 seconds only)",
	value=True,
	info="Enable to quickly test settings on a short clip before processing the full video (recommended)"
	)

	process_btn = gr.Button("Process Video", variant="primary")
	gr.Markdown("""
	Note: Processing in test mode will only process the first 3 seconds of the video and is recommended for testing settings.
	""")

	gr.Markdown("""
	We can get a rough estimate of how long the video will take to process by multiplying the videos framerate * seconds * the number of rows and columns and assuming 0.12 seconds processing time per detection.
	For example, a 3 second video at 30fps with 2x2 grid, the estimated time is 3 * 30 * 2 * 2 * 0.12 = 43.2 seconds (tested on a 4090 GPU).
	""")

	with gr.Column():
	# Output components
	video_output = gr.Video(label="Processed Video")

	# About section under the video output
	gr.Markdown("""
	### About Moondream
	Moondream is a tiny yet powerful vision-language model that can analyze images and answer questions about them.
	It's designed to be lightweight and efficient while maintaining high accuracy. Some key features:
	- Only 2B parameters (compared to 80B+ in other models)
	- Fast inference with minimal resource requirements
	- Supports CPU and GPU execution
	- Open source and free to use

	Links:
	- [GitHub Repository](https://github.com/vikhyat/moondream)
	- [Hugging Face Space](https://huggingface.co/vikhyatk/moondream2)
	- [Python Package](https://pypi.org/project/moondream/)
	""")

	# Event handlers
	process_btn.click(
	fn=process_video_file,
	inputs=[video_input, detect_input, box_style_input, preset_input, rows_input, cols_input, test_mode_input],
	outputs=video_output
	)

	if __name__ == "__main__":
	app.launch(share=True)