d-fine-object-detection

Build error

App Files Files Community

qubvel-hf commited on May 2

Commit

9f2cbff

1 Parent(s): 455454b

Fix video writing

Browse files

Files changed (2) hide show

app.py +17 -24
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import os
 import cv2
 import tqdm
-import shutil
-import tempfile
 import logging
-import supervision as sv
-import torch
 import spaces
-import gradio as gr
 import numpy as np
 from pathlib import Path
 from functools import lru_cache
@@ -151,13 +151,18 @@ def process_image(
 def get_target_size(image_height, image_width, max_size: int):
     if image_height < max_size and image_width < max_size:
-        return image_width, image_height
-    if image_height > image_width:
         new_height = max_size
         new_width = int(image_width * max_size / image_height)
     else:
         new_width = max_size
         new_height = int(image_height * max_size / image_width)
     return new_width, new_height
@@ -201,11 +206,6 @@ def process_video(
     n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
     frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
-    # Use H.264 codec for browser compatibility
-    fourcc = cv2.VideoWriter_fourcc(*"H264")
-    temp_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-    writer = cv2.VideoWriter(temp_file.name, fourcc, target_fps, (target_width, target_height))
     box_annotator = sv.BoxAnnotator(thickness=1)
     label_annotator = sv.LabelAnnotator(text_scale=0.5)
@@ -216,25 +216,18 @@ def process_video(
         target_size=(target_height, target_width),
     )
     for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
         frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
         detections = sv.Detections.from_transformers(result, id2label=id2label)
         detections = detections.with_nms(threshold=0.95, class_agnostic=True)
         annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
         annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)
-        writer.write(cv2.cvtColor(annotated_frame, cv2.COLOR_RGB2BGR))
-    writer.release()
-    temp_file.close()
-    # Copy to persistent directory for Gradio access
-    output_filename = f"output_{os.path.basename(temp_file.name)}"
-    output_path = VIDEO_OUTPUT_DIR / output_filename
-    shutil.copy(temp_file.name, output_path)
-    os.unlink(temp_file.name)  # Remove temporary file
-    logger.info(f"Video saved to {output_path}")
-    return str(output_path)

 import os
 import cv2
 import tqdm
+import uuid
 import logging
+import torch
 import spaces
 import numpy as np
+import gradio as gr
+import imageio.v3 as iio
+import supervision as sv
 from pathlib import Path
 from functools import lru_cache
 def get_target_size(image_height, image_width, max_size: int):
     if image_height < max_size and image_width < max_size:
+        new_height, new_width = image_width, image_height
+    elif image_height > image_width:
         new_height = max_size
         new_width = int(image_width * max_size / image_height)
     else:
         new_width = max_size
         new_height = int(image_height * max_size / image_width)
+    # make even (for video codec compatibility)
+    new_height = new_height // 2 * 2
+    new_width = new_width // 2 * 2
     return new_width, new_height
     n_frames_to_read = min(MAX_NUM_FRAMES, video_info.total_frames // read_each_i_frame)
     frames = read_video_k_frames(video_path, n_frames_to_read, read_each_i_frame)
     box_annotator = sv.BoxAnnotator(thickness=1)
     label_annotator = sv.LabelAnnotator(text_scale=0.5)
         target_size=(target_height, target_width),
     )
+    annotated_frames = []
     for frame, result in tqdm.tqdm(zip(frames, results), desc="Annotating frames", total=len(frames)):
         frame = cv2.resize(frame, (target_width, target_height), interpolation=cv2.INTER_AREA)
         detections = sv.Detections.from_transformers(result, id2label=id2label)
         detections = detections.with_nms(threshold=0.95, class_agnostic=True)
         annotated_frame = box_annotator.annotate(scene=frame, detections=detections)
         annotated_frame = label_annotator.annotate(scene=annotated_frame, detections=detections)
+        annotated_frames.append(annotated_frame)
+    output_filename = os.path.join(VIDEO_OUTPUT_DIR, f"output_{uuid.uuid4()}.mp4")
+    iio.imwrite(output_filename, annotated_frames, fps=target_fps, codec="h264") #, pixelformat="yuv420p")
+    return output_filename

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ ffmpeg-python
 tqdm
 pillow
 supervision
-spaces

 tqdm
 pillow
 supervision
+spaces
+imageio[pyav]