BoukamchaSmartVisions commited on
Commit
f44b506
·
verified ·
1 Parent(s): 879c1f7

feat: Add support for video input and frame-by-frame processing in YOLOv10 Gradio app

Browse files
Files changed (1) hide show
  1. app.py +73 -10
app.py CHANGED
@@ -3,7 +3,8 @@ from ultralytics import YOLOv10
3
  import supervision as sv
4
  import spaces
5
  from huggingface_hub import hf_hub_download
6
-
 
7
 
8
  def download_models(model_id):
9
  hf_hub_download("BoukamchaSmartVisions/Yolov10", filename=f"{model_id}", local_dir=f"./")
@@ -29,7 +30,6 @@ category_dict = {
29
  77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
30
  }
31
 
32
-
33
  @spaces.GPU(duration=200)
34
  def yolov10_inference(image, model_id, image_size, conf_threshold, iou_threshold):
35
  model_path = download_models(model_id)
@@ -45,12 +45,55 @@ def yolov10_inference(image, model_id, image_size, conf_threshold, iou_threshold
45
 
46
  return annotated_image
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def app():
49
  with gr.Blocks():
50
  with gr.Row():
51
  with gr.Column():
52
- image = gr.Image(type="numpy", label="Image")
53
-
 
 
 
 
 
 
 
 
 
 
 
 
54
  model_id = gr.Dropdown(
55
  label="Model",
56
  choices=[
@@ -87,53 +130,73 @@ def app():
87
  yolov10_infer = gr.Button(value="Detect Objects")
88
 
89
  with gr.Column():
90
- output_image = gr.Image(type="numpy", label="Annotated Image")
 
91
 
92
  yolov10_infer.click(
93
- fn=yolov10_inference,
94
  inputs=[
 
95
  image,
 
96
  model_id,
97
  image_size,
98
  conf_threshold,
99
  iou_threshold,
100
  ],
101
- outputs=[output_image],
102
  )
103
 
104
  gr.Examples(
105
  examples=[
106
  [
 
107
  "Animals_persones.jpg",
 
108
  "yolov10x.pt",
109
  640,
110
  0.25,
111
  0.45,
112
  ],
113
  [
 
114
  "collage-horses-other-pets-white.jpg",
 
115
  "yolov10m.pt",
116
  640,
117
  0.25,
118
  0.45,
119
  ],
120
  [
 
121
  "Ville.png",
 
122
  "yolov10b.pt",
123
  640,
124
  0.25,
125
  0.45,
126
  ],
 
 
 
 
 
 
 
 
 
127
  ],
128
- fn=yolov10_inference,
129
  inputs=[
 
130
  image,
 
131
  model_id,
132
  image_size,
133
  conf_threshold,
134
  iou_threshold,
135
  ],
136
- outputs=[output_image],
137
  cache_examples=True,
138
  )
139
 
@@ -156,4 +219,4 @@ with gradio_app:
156
  with gr.Column():
157
  app()
158
 
159
- gradio_app.launch(debug=True)
 
3
  import supervision as sv
4
  import spaces
5
  from huggingface_hub import hf_hub_download
6
+ import cv2
7
+ import tempfile
8
 
9
  def download_models(model_id):
10
  hf_hub_download("BoukamchaSmartVisions/Yolov10", filename=f"{model_id}", local_dir=f"./")
 
30
  77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
31
  }
32
 
 
33
  @spaces.GPU(duration=200)
34
  def yolov10_inference(image, model_id, image_size, conf_threshold, iou_threshold):
35
  model_path = download_models(model_id)
 
45
 
46
  return annotated_image
47
 
48
+ def yolov10_video_inference(video, model_id, image_size, conf_threshold, iou_threshold):
49
+ model_path = download_models(model_id)
50
+ model = YOLOv10(model_path)
51
+
52
+ cap = cv2.VideoCapture(video)
53
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
54
+ out = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
55
+ out_path = out.name
56
+
57
+ ret, frame = cap.read()
58
+ height, width, _ = frame.shape
59
+ writer = cv2.VideoWriter(out_path, fourcc, 30, (width, height))
60
+
61
+ while ret:
62
+ results = model(source=frame, imgsz=image_size, iou=iou_threshold, conf=conf_threshold, verbose=False)[0]
63
+ detections = sv.Detections.from_ultralytics(results)
64
+
65
+ labels = [
66
+ f"{category_dict[class_id]} {confidence:.2f}"
67
+ for class_id, confidence in zip(detections.class_id, detections.confidence)
68
+ ]
69
+ annotated_frame = box_annotator.annotate(frame, detections=detections, labels=labels)
70
+
71
+ writer.write(annotated_frame)
72
+ ret, frame = cap.read()
73
+
74
+ cap.release()
75
+ writer.release()
76
+
77
+ return out_path
78
+
79
  def app():
80
  with gr.Blocks():
81
  with gr.Row():
82
  with gr.Column():
83
+ image_or_video = gr.Radio(
84
+ label="Input Type",
85
+ choices=["Image", "Video"],
86
+ value="Image",
87
+ )
88
+ image = gr.Image(type="numpy", label="Image", visible=True)
89
+ video = gr.Video(label="Video", visible=False)
90
+
91
+ image_or_video.change(
92
+ lambda x: (gr.update(visible=x=="Image"), gr.update(visible=x=="Video")),
93
+ inputs=[image_or_video],
94
+ outputs=[image, video],
95
+ )
96
+
97
  model_id = gr.Dropdown(
98
  label="Model",
99
  choices=[
 
130
  yolov10_infer = gr.Button(value="Detect Objects")
131
 
132
  with gr.Column():
133
+ output_image = gr.Image(type="numpy", label="Annotated Image", visible=True)
134
+ output_video = gr.Video(label="Annotated Video", visible=False)
135
 
136
  yolov10_infer.click(
137
+ fn=lambda inputs: yolov10_inference(*inputs) if inputs[0] == "Image" else yolov10_video_inference(*inputs[1:]),
138
  inputs=[
139
+ image_or_video,
140
  image,
141
+ video,
142
  model_id,
143
  image_size,
144
  conf_threshold,
145
  iou_threshold,
146
  ],
147
+ outputs=[output_image, output_video],
148
  )
149
 
150
  gr.Examples(
151
  examples=[
152
  [
153
+ "Image",
154
  "Animals_persones.jpg",
155
+ None,
156
  "yolov10x.pt",
157
  640,
158
  0.25,
159
  0.45,
160
  ],
161
  [
162
+ "Image",
163
  "collage-horses-other-pets-white.jpg",
164
+ None,
165
  "yolov10m.pt",
166
  640,
167
  0.25,
168
  0.45,
169
  ],
170
  [
171
+ "Image",
172
  "Ville.png",
173
+ None,
174
  "yolov10b.pt",
175
  640,
176
  0.25,
177
  0.45,
178
  ],
179
+ [
180
+ "Video",
181
+ None,
182
+ "sample_video.mp4",
183
+ "yolov10m.pt",
184
+ 640,
185
+ 0.25,
186
+ 0.45,
187
+ ],
188
  ],
189
+ fn=lambda inputs: yolov10_inference(*inputs) if inputs[0] == "Image" else yolov10_video_inference(*inputs[1:]),
190
  inputs=[
191
+ image_or_video,
192
  image,
193
+ video,
194
  model_id,
195
  image_size,
196
  conf_threshold,
197
  iou_threshold,
198
  ],
199
+ outputs=[output_image, output_video],
200
  cache_examples=True,
201
  )
202
 
 
219
  with gr.Column():
220
  app()
221
 
222
+ gradio_app.launch(debug=True)