Yolov10

Runtime error

App Files Files Community

BoukamchaSmartVisions commited on May 26, 2024

Commit

f44b506

verified ·

1 Parent(s): 879c1f7

feat: Add support for video input and frame-by-frame processing in YOLOv10 Gradio app

Browse files

Files changed (1) hide show

app.py +73 -10

app.py CHANGED Viewed

@@ -3,7 +3,8 @@ from ultralytics import YOLOv10
 import supervision as sv
 import spaces
 from huggingface_hub import hf_hub_download
 def download_models(model_id):
     hf_hub_download("BoukamchaSmartVisions/Yolov10", filename=f"{model_id}", local_dir=f"./")
@@ -29,7 +30,6 @@ category_dict = {
     77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
 }
 @spaces.GPU(duration=200)
 def yolov10_inference(image, model_id, image_size, conf_threshold, iou_threshold):
     model_path = download_models(model_id)
@@ -45,12 +45,55 @@ def yolov10_inference(image, model_id, image_size, conf_threshold, iou_threshold
     return annotated_image
 def app():
     with gr.Blocks():
         with gr.Row():
             with gr.Column():
-                image = gr.Image(type="numpy", label="Image")
                 model_id = gr.Dropdown(
                     label="Model",
                     choices=[
@@ -87,53 +130,73 @@ def app():
                 yolov10_infer = gr.Button(value="Detect Objects")
             with gr.Column():
-                output_image = gr.Image(type="numpy", label="Annotated Image")
         yolov10_infer.click(
-            fn=yolov10_inference,
             inputs=[
                 image,
                 model_id,
                 image_size,
                 conf_threshold,
                 iou_threshold,
             ],
-            outputs=[output_image],
         )
         gr.Examples(
             examples=[
                 [
                     "Animals_persones.jpg",
                     "yolov10x.pt",
                     640,
                     0.25,
                     0.45,
                 ],
                 [
                     "collage-horses-other-pets-white.jpg",
                     "yolov10m.pt",
                     640,
                     0.25,
                     0.45,
                 ],
                 [
                     "Ville.png",
                     "yolov10b.pt",
                     640,
                     0.25,
                     0.45,
                 ],
             ],
-            fn=yolov10_inference,
             inputs=[
                 image,
                 model_id,
                 image_size,
                 conf_threshold,
                 iou_threshold,
             ],
-            outputs=[output_image],
             cache_examples=True,
         )
@@ -156,4 +219,4 @@ with gradio_app:
         with gr.Column():
             app()
-gradio_app.launch(debug=True)

 import supervision as sv
 import spaces
 from huggingface_hub import hf_hub_download
+import cv2
+import tempfile
 def download_models(model_id):
     hf_hub_download("BoukamchaSmartVisions/Yolov10", filename=f"{model_id}", local_dir=f"./")
     77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
 }
 @spaces.GPU(duration=200)
 def yolov10_inference(image, model_id, image_size, conf_threshold, iou_threshold):
     model_path = download_models(model_id)
     return annotated_image
+def yolov10_video_inference(video, model_id, image_size, conf_threshold, iou_threshold):
+    model_path = download_models(model_id)
+    model = YOLOv10(model_path)
+    cap = cv2.VideoCapture(video)
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+    out_path = out.name
+    ret, frame = cap.read()
+    height, width, _ = frame.shape
+    writer = cv2.VideoWriter(out_path, fourcc, 30, (width, height))
+    while ret:
+        results = model(source=frame, imgsz=image_size, iou=iou_threshold, conf=conf_threshold, verbose=False)[0]
+        detections = sv.Detections.from_ultralytics(results)
+        labels = [
+            f"{category_dict[class_id]} {confidence:.2f}"
+            for class_id, confidence in zip(detections.class_id, detections.confidence)
+        ]
+        annotated_frame = box_annotator.annotate(frame, detections=detections, labels=labels)
+        writer.write(annotated_frame)
+        ret, frame = cap.read()
+    cap.release()
+    writer.release()
+    return out_path
 def app():
     with gr.Blocks():
         with gr.Row():
             with gr.Column():
+                image_or_video = gr.Radio(
+                    label="Input Type",
+                    choices=["Image", "Video"],
+                    value="Image",
+                )
+                image = gr.Image(type="numpy", label="Image", visible=True)
+                video = gr.Video(label="Video", visible=False)
+                image_or_video.change(
+                    lambda x: (gr.update(visible=x=="Image"), gr.update(visible=x=="Video")),
+                    inputs=[image_or_video],
+                    outputs=[image, video],
+                )
                 model_id = gr.Dropdown(
                     label="Model",
                     choices=[
                 yolov10_infer = gr.Button(value="Detect Objects")
             with gr.Column():
+                output_image = gr.Image(type="numpy", label="Annotated Image", visible=True)
+                output_video = gr.Video(label="Annotated Video", visible=False)
         yolov10_infer.click(
+            fn=lambda inputs: yolov10_inference(*inputs) if inputs[0] == "Image" else yolov10_video_inference(*inputs[1:]),
             inputs=[
+                image_or_video,
                 image,
+                video,
                 model_id,
                 image_size,
                 conf_threshold,
                 iou_threshold,
             ],
+            outputs=[output_image, output_video],
         )
         gr.Examples(
             examples=[
                 [
+                    "Image",
                     "Animals_persones.jpg",
+                    None,
                     "yolov10x.pt",
                     640,
                     0.25,
                     0.45,
                 ],
                 [
+                    "Image",
                     "collage-horses-other-pets-white.jpg",
+                    None,
                     "yolov10m.pt",
                     640,
                     0.25,
                     0.45,
                 ],
                 [
+                    "Image",
                     "Ville.png",
+                    None,
                     "yolov10b.pt",
                     640,
                     0.25,
                     0.45,
                 ],
+                [
+                    "Video",
+                    None,
+                    "sample_video.mp4",
+                    "yolov10m.pt",
+                    640,
+                    0.25,
+                    0.45,
+                ],
             ],
+            fn=lambda inputs: yolov10_inference(*inputs) if inputs[0] == "Image" else yolov10_video_inference(*inputs[1:]),
             inputs=[
+                image_or_video,
                 image,
+                video,
                 model_id,
                 image_size,
                 conf_threshold,
                 iou_threshold,
             ],
+            outputs=[output_image, output_video],
             cache_examples=True,
         )
         with gr.Column():
             app()
+gradio_app.launch(debug=True)