Spaces:
Build error
Build error
Fix video writing
Browse files- app.py +17 -24
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
import os
|
| 2 |
import cv2
|
| 3 |
import tqdm
|
| 4 |
-
import
|
| 5 |
-
import tempfile
|
| 6 |
import logging
|
| 7 |
-
import supervision as sv
|
| 8 |
-
import torch
|
| 9 |
|
|
|
|
| 10 |
import spaces
|
| 11 |
-
import gradio as gr
|
| 12 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
from pathlib import Path
|
| 15 |
from functools import lru_cache
|
|
@@ -151,13 +151,18 @@ def process_image(
|
|
| 151 |
|
| 152 |
def get_target_size(image_height, image_width, max_size: int):
|
| 153 |
if image_height < max_size and image_width < max_size:
|
| 154 |
-
|
| 155 |
-
|
| 156 |
new_height = max_size
|
| 157 |
new_width = int(image_width * max_size / image_height)
|
| 158 |
else:
|
| 159 |
new_width = max_size
|
| 160 |
new_height = int(image_height * max_size / image_width)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
return new_width, new_height
|
| 162 |
|
| 163 |
|
|
@@ -201,11 +206,6 @@ def process_video(
|
|
| 201 |
n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
|
| 202 |
frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
|
| 203 |
|
| 204 |
-
# Use H.264 codec for browser compatibility
|
| 205 |
-
fourcc = cv2.VideoWriter_fourcc(*"H264")
|
| 206 |
-
temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
| 207 |
-
writer = cv2.VideoWriter(temp_file.name, fourcc, target_fps, (target_width, target_height))
|
| 208 |
-
|
| 209 |
box_annotator = sv.BoxAnnotator(thickness=1)
|
| 210 |
label_annotator = sv.LabelAnnotator(text_scale=0.5)
|
| 211 |
|
|
@@ -216,25 +216,18 @@ def process_video(
|
|
| 216 |
target_size=(target_height, target_width),
|
| 217 |
)
|
| 218 |
|
|
|
|
| 219 |
for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
|
| 220 |
frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
|
| 221 |
detections = sv.Detections.from_transformers(result, id2label=id2label)
|
| 222 |
detections = detections.with_nms(threshold=0.95, class_agnostic=True)
|
| 223 |
annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
|
| 224 |
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
writer.release()
|
| 228 |
-
temp_file.close()
|
| 229 |
-
|
| 230 |
-
# Copy to persistent directory for Gradio access
|
| 231 |
-
output_filename = f"output_{os.path.basename(temp_file.name)}"
|
| 232 |
-
output_path = VIDEO_OUTPUT_DIR / output_filename
|
| 233 |
-
shutil.copy(temp_file.name, output_path)
|
| 234 |
-
os.unlink(temp_file.name) # Remove temporary file
|
| 235 |
-
logger.info(f"Video saved to {output_path}")
|
| 236 |
|
| 237 |
-
|
|
|
|
|
|
|
| 238 |
|
| 239 |
|
| 240 |
|
|
|
|
| 1 |
import os
|
| 2 |
import cv2
|
| 3 |
import tqdm
|
| 4 |
+
import uuid
|
|
|
|
| 5 |
import logging
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
import torch
|
| 8 |
import spaces
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
+
import gradio as gr
|
| 11 |
+
import imageio.v3 as iio
|
| 12 |
+
import supervision as sv
|
| 13 |
|
| 14 |
from pathlib import Path
|
| 15 |
from functools import lru_cache
|
|
|
|
| 151 |
|
| 152 |
def get_target_size(image_height, image_width, max_size: int):
|
| 153 |
if image_height < max_size and image_width < max_size:
|
| 154 |
+
new_height, new_width = image_width, image_height
|
| 155 |
+
elif image_height > image_width:
|
| 156 |
new_height = max_size
|
| 157 |
new_width = int(image_width * max_size / image_height)
|
| 158 |
else:
|
| 159 |
new_width = max_size
|
| 160 |
new_height = int(image_height * max_size / image_width)
|
| 161 |
+
|
| 162 |
+
# make even (for video codec compatibility)
|
| 163 |
+
new_height = new_height // 2 * 2
|
| 164 |
+
new_width = new_width // 2 * 2
|
| 165 |
+
|
| 166 |
return new_width, new_height
|
| 167 |
|
| 168 |
|
|
|
|
| 206 |
n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
|
| 207 |
frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
box_annotator = sv.BoxAnnotator(thickness=1)
|
| 210 |
label_annotator = sv.LabelAnnotator(text_scale=0.5)
|
| 211 |
|
|
|
|
| 216 |
target_size=(target_height, target_width),
|
| 217 |
)
|
| 218 |
|
| 219 |
+
annotated_frames = []
|
| 220 |
for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
|
| 221 |
frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
|
| 222 |
detections = sv.Detections.from_transformers(result, id2label=id2label)
|
| 223 |
detections = detections.with_nms(threshold=0.95, class_agnostic=True)
|
| 224 |
annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
|
| 225 |
annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)
|
| 226 |
+
annotated_frames.append(annotated_frame)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
+
output_filename = os.path.join(VIDEO_OUTPUT_DIR, f"output_{uuid.uuid4()}.mp4")
|
| 229 |
+
iio.imwrite(output_filename, annotated_frames, fps=target_fps, codec="h264") #, pixelformat="yuv420p")
|
| 230 |
+
return output_filename
|
| 231 |
|
| 232 |
|
| 233 |
|
requirements.txt
CHANGED
|
@@ -7,4 +7,5 @@ ffmpeg-python
|
|
| 7 |
tqdm
|
| 8 |
pillow
|
| 9 |
supervision
|
| 10 |
-
spaces
|
|
|
|
|
|
| 7 |
tqdm
|
| 8 |
pillow
|
| 9 |
supervision
|
| 10 |
+
spaces
|
| 11 |
+
imageio[pyav]
|