Spaces:
Running
on
T4
Running
on
T4
initial video processing support
Browse files- .gitattributes +1 -0
- app.py +28 -11
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
from typing import TypeVar
|
| 3 |
|
| 4 |
-
from tqdm import tqdm
|
| 5 |
import gradio as gr
|
| 6 |
import numpy as np
|
| 7 |
import supervision as sv
|
|
@@ -26,18 +25,22 @@ RF-DETR is a real-time, transformer-based object detection model architecture de
|
|
| 26 |
by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
|
| 27 |
"""
|
| 28 |
|
| 29 |
-
|
| 30 |
['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
|
| 31 |
['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
|
| 32 |
['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
|
| 33 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
COLOR = sv.ColorPalette.from_hex([
|
| 36 |
"#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
|
| 37 |
"#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
|
| 38 |
])
|
| 39 |
|
| 40 |
-
MAX_VIDEO_LENGTH_SECONDS =
|
| 41 |
VIDEO_SCALE_FACTOR = 0.5
|
| 42 |
VIDEO_TARGET_DIRECTORY = "tmp"
|
| 43 |
|
|
@@ -59,8 +62,7 @@ def detect_and_annotate(
|
|
| 59 |
label_annotator = sv.LabelAnnotator(
|
| 60 |
color=COLOR,
|
| 61 |
text_color=sv.Color.BLACK,
|
| 62 |
-
text_scale=text_scale
|
| 63 |
-
smart_position=True
|
| 64 |
)
|
| 65 |
|
| 66 |
labels = [
|
|
@@ -98,7 +100,7 @@ def video_processing_inference(
|
|
| 98 |
confidence: float,
|
| 99 |
resolution: int,
|
| 100 |
checkpoint: str,
|
| 101 |
-
progress=gr.Progress(
|
| 102 |
):
|
| 103 |
model = load_model(resolution=resolution, checkpoint=checkpoint)
|
| 104 |
|
|
@@ -113,13 +115,13 @@ def video_processing_inference(
|
|
| 113 |
frames_generator = sv.get_video_frames_generator(input_video, end=total)
|
| 114 |
|
| 115 |
with sv.VideoSink(output_video, video_info=video_info) as sink:
|
| 116 |
-
for frame in tqdm(frames_generator, total=total):
|
| 117 |
-
frame = sv.scale_image(frame, VIDEO_SCALE_FACTOR)
|
| 118 |
annotated_frame = detect_and_annotate(
|
| 119 |
model=model,
|
| 120 |
image=frame,
|
| 121 |
confidence=confidence
|
| 122 |
)
|
|
|
|
| 123 |
sink.write_frame(annotated_frame)
|
| 124 |
|
| 125 |
return output_video
|
|
@@ -166,7 +168,7 @@ with gr.Blocks() as demo:
|
|
| 166 |
|
| 167 |
gr.Examples(
|
| 168 |
fn=image_processing_inference,
|
| 169 |
-
examples=
|
| 170 |
inputs=[
|
| 171 |
image_processing_input_image,
|
| 172 |
image_processing_confidence_slider,
|
|
@@ -174,7 +176,8 @@ with gr.Blocks() as demo:
|
|
| 174 |
image_processing_checkpoint_dropdown
|
| 175 |
],
|
| 176 |
outputs=image_processing_output_image,
|
| 177 |
-
cache_examples=True
|
|
|
|
| 178 |
)
|
| 179 |
|
| 180 |
image_processing_submit_button.click(
|
|
@@ -185,7 +188,7 @@ with gr.Blocks() as demo:
|
|
| 185 |
image_processing_resolution_slider,
|
| 186 |
image_processing_checkpoint_dropdown
|
| 187 |
],
|
| 188 |
-
outputs=image_processing_output_image
|
| 189 |
)
|
| 190 |
with gr.Tab("Video"):
|
| 191 |
with gr.Row():
|
|
@@ -221,6 +224,20 @@ with gr.Blocks() as demo:
|
|
| 221 |
with gr.Column():
|
| 222 |
video_processing_submit_button = gr.Button("Submit", value="primary")
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
video_processing_submit_button.click(
|
| 225 |
video_processing_inference,
|
| 226 |
inputs=[
|
|
|
|
| 1 |
import os
|
| 2 |
from typing import TypeVar
|
| 3 |
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
import numpy as np
|
| 6 |
import supervision as sv
|
|
|
|
| 25 |
by [Roboflow](https://roboflow.com/) and released under the Apache 2.0 license.
|
| 26 |
"""
|
| 27 |
|
| 28 |
+
IMAGE_PROCESSING_EXAMPLES = [
|
| 29 |
['https://media.roboflow.com/supervision/image-examples/people-walking.png', 0.3, 728, "large"],
|
| 30 |
['https://media.roboflow.com/supervision/image-examples/vehicles.png', 0.3, 728, "large"],
|
| 31 |
['https://media.roboflow.com/notebooks/examples/dog-2.jpeg', 0.5, 560, "base"],
|
| 32 |
]
|
| 33 |
+
VIDEO_PROCESSING_EXAMPLES = [
|
| 34 |
+
["videos/people-walking.mp4", 0.3, 728, "large"],
|
| 35 |
+
["videos/vehicles.mp4", 0.3, 728, "large"],
|
| 36 |
+
]
|
| 37 |
|
| 38 |
COLOR = sv.ColorPalette.from_hex([
|
| 39 |
"#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
|
| 40 |
"#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
|
| 41 |
])
|
| 42 |
|
| 43 |
+
MAX_VIDEO_LENGTH_SECONDS = 5
|
| 44 |
VIDEO_SCALE_FACTOR = 0.5
|
| 45 |
VIDEO_TARGET_DIRECTORY = "tmp"
|
| 46 |
|
|
|
|
| 62 |
label_annotator = sv.LabelAnnotator(
|
| 63 |
color=COLOR,
|
| 64 |
text_color=sv.Color.BLACK,
|
| 65 |
+
text_scale=text_scale
|
|
|
|
| 66 |
)
|
| 67 |
|
| 68 |
labels = [
|
|
|
|
| 100 |
confidence: float,
|
| 101 |
resolution: int,
|
| 102 |
checkpoint: str,
|
| 103 |
+
progress=gr.Progress()
|
| 104 |
):
|
| 105 |
model = load_model(resolution=resolution, checkpoint=checkpoint)
|
| 106 |
|
|
|
|
| 115 |
frames_generator = sv.get_video_frames_generator(input_video, end=total)
|
| 116 |
|
| 117 |
with sv.VideoSink(output_video, video_info=video_info) as sink:
|
| 118 |
+
for frame in progress.tqdm(frames_generator, total=total):
|
|
|
|
| 119 |
annotated_frame = detect_and_annotate(
|
| 120 |
model=model,
|
| 121 |
image=frame,
|
| 122 |
confidence=confidence
|
| 123 |
)
|
| 124 |
+
annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
|
| 125 |
sink.write_frame(annotated_frame)
|
| 126 |
|
| 127 |
return output_video
|
|
|
|
| 168 |
|
| 169 |
gr.Examples(
|
| 170 |
fn=image_processing_inference,
|
| 171 |
+
examples=IMAGE_PROCESSING_EXAMPLES,
|
| 172 |
inputs=[
|
| 173 |
image_processing_input_image,
|
| 174 |
image_processing_confidence_slider,
|
|
|
|
| 176 |
image_processing_checkpoint_dropdown
|
| 177 |
],
|
| 178 |
outputs=image_processing_output_image,
|
| 179 |
+
cache_examples=True,
|
| 180 |
+
run_on_click=True
|
| 181 |
)
|
| 182 |
|
| 183 |
image_processing_submit_button.click(
|
|
|
|
| 188 |
image_processing_resolution_slider,
|
| 189 |
image_processing_checkpoint_dropdown
|
| 190 |
],
|
| 191 |
+
outputs=image_processing_output_image,
|
| 192 |
)
|
| 193 |
with gr.Tab("Video"):
|
| 194 |
with gr.Row():
|
|
|
|
| 224 |
with gr.Column():
|
| 225 |
video_processing_submit_button = gr.Button("Submit", value="primary")
|
| 226 |
|
| 227 |
+
gr.Examples(
|
| 228 |
+
fn=video_processing_inference,
|
| 229 |
+
examples=VIDEO_PROCESSING_EXAMPLES,
|
| 230 |
+
inputs=[
|
| 231 |
+
video_processing_input_video,
|
| 232 |
+
video_processing_confidence_slider,
|
| 233 |
+
video_processing_resolution_slider,
|
| 234 |
+
video_processing_checkpoint_dropdown
|
| 235 |
+
],
|
| 236 |
+
outputs=video_processing_output_video,
|
| 237 |
+
cache_examples=True,
|
| 238 |
+
run_on_click=True
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
video_processing_submit_button.click(
|
| 242 |
video_processing_inference,
|
| 243 |
inputs=[
|