Spaces:
Runtime error
Runtime error
update UI
Browse files- .gitignore +2 -1
- app.py +43 -17
- run_on_video/run.py +1 -0
.gitignore
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
*.mp4
|
2 |
*.MP4
|
3 |
*.mov
|
4 |
-
*.MOV
|
|
|
|
1 |
*.mp4
|
2 |
*.MP4
|
3 |
*.mov
|
4 |
+
*.MOV
|
5 |
+
testing_data
|
app.py
CHANGED
@@ -2,14 +2,16 @@ import gradio as gr
|
|
2 |
from run_on_video.run import MomentDETRPredictor
|
3 |
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
|
4 |
import torch
|
|
|
5 |
|
6 |
DESCRIPTION = """
|
7 |
_This Space demonstrates model [QVHighlights: Detecting Moments and Highlights in Videos via Natural Language Queries](https://arxiv.org/abs/2107.09609), NeurIPS 2021, by [Jie Lei](http://www.cs.unc.edu/~jielei/), [Tamara L. Berg](http://tamaraberg.com/), [Mohit Bansal](http://www.cs.unc.edu/~mbansal/)_
|
8 |
"""
|
9 |
|
|
|
|
|
10 |
ckpt_path = "run_on_video/moment_detr_ckpt/model_best.ckpt"
|
11 |
clip_model_name_or_path = "ViT-B/32"
|
12 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
13 |
|
14 |
moment_detr_predictor = MomentDETRPredictor(
|
15 |
ckpt_path=ckpt_path,
|
@@ -22,11 +24,14 @@ def trim_video(video_path, start, end, output_file='result.mp4'):
|
|
22 |
return output_file
|
23 |
|
24 |
def display_prediction(result):
|
25 |
-
return f'###
|
26 |
|
27 |
with gr.Blocks(theme=gr.themes.Default()) as demo:
|
28 |
output_videos = gr.State(None)
|
|
|
29 |
moment_prediction = gr.State(None)
|
|
|
|
|
30 |
gr.HTML("""<h2 align="center"> 🎞️ Highlight Detection with MomentDETR </h2>""")
|
31 |
gr.Markdown(DESCRIPTION)
|
32 |
with gr.Column():
|
@@ -37,8 +42,14 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|
37 |
input_video = gr.Video(label="Please input mp4", height=400)
|
38 |
with gr.Blocks():
|
39 |
with gr.Column():
|
40 |
-
gr.HTML("""<h3 align="center">
|
41 |
playable_video = gr.Video(height=400)
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
with gr.Row():
|
43 |
with gr.Column():
|
44 |
retrieval_text = gr.Textbox(
|
@@ -50,24 +61,27 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|
50 |
with gr.Column():
|
51 |
radio_button = gr.Radio(
|
52 |
choices=[i+1 for i in range(10)],
|
53 |
-
label="
|
54 |
value=1
|
55 |
)
|
56 |
-
display_score = gr.Markdown("### Moment Score: ")
|
|
|
57 |
|
58 |
-
def update_video_player(radio_value, output_videos, moment_prediction):
|
59 |
if output_videos is None or moment_prediction is None:
|
60 |
-
return [None, None]
|
61 |
return {
|
62 |
-
playable_video: output_videos[radio_value-1],
|
63 |
-
|
|
|
|
|
64 |
}
|
65 |
|
66 |
def submit_video(input_video, retrieval_text):
|
67 |
print(f'== video path: {input_video}')
|
68 |
print(f'== retrieval_text: {retrieval_text}')
|
69 |
if input_video is None:
|
70 |
-
return [None, None, None, None, 1]
|
71 |
if retrieval_text is None:
|
72 |
retrieval_text = ''
|
73 |
predictions, video_frames = moment_detr_predictor.localize_moment(
|
@@ -75,32 +89,44 @@ with gr.Blocks(theme=gr.themes.Default()) as demo:
|
|
75 |
query_list=[retrieval_text]
|
76 |
)
|
77 |
predictions = predictions[0]['pred_relevant_windows']
|
78 |
-
pred_windows = [[pred[0], pred[1]]for pred in predictions]
|
79 |
output_files = [ trim_video(
|
80 |
video_path=input_video,
|
81 |
-
start=
|
82 |
-
end=
|
83 |
output_file=f'{i}.mp4'
|
84 |
) for i in range(10)]
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
return {
|
87 |
output_videos: output_files,
|
|
|
88 |
moment_prediction: predictions,
|
|
|
89 |
playable_video: output_files[0],
|
|
|
90 |
display_score: display_prediction(predictions[0]),
|
|
|
91 |
radio_button: 1
|
92 |
}
|
93 |
|
94 |
radio_button.change(
|
95 |
fn=update_video_player,
|
96 |
-
inputs=[radio_button, output_videos, moment_prediction],
|
97 |
-
outputs=[playable_video, display_score]
|
98 |
)
|
99 |
|
100 |
submit.click(
|
101 |
fn=submit_video,
|
102 |
inputs=[input_video, retrieval_text],
|
103 |
-
outputs=[output_videos, moment_prediction, playable_video, display_score, radio_button]
|
104 |
)
|
105 |
|
106 |
demo.launch()
|
|
|
2 |
from run_on_video.run import MomentDETRPredictor
|
3 |
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
|
4 |
import torch
|
5 |
+
from lbhd.infer import lbhd_predict
|
6 |
|
7 |
DESCRIPTION = """
|
8 |
_This Space demonstrates model [QVHighlights: Detecting Moments and Highlights in Videos via Natural Language Queries](https://arxiv.org/abs/2107.09609), NeurIPS 2021, by [Jie Lei](http://www.cs.unc.edu/~jielei/), [Tamara L. Berg](http://tamaraberg.com/), [Mohit Bansal](http://www.cs.unc.edu/~mbansal/)_
|
9 |
"""
|
10 |
|
11 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
12 |
+
|
13 |
ckpt_path = "run_on_video/moment_detr_ckpt/model_best.ckpt"
|
14 |
clip_model_name_or_path = "ViT-B/32"
|
|
|
15 |
|
16 |
moment_detr_predictor = MomentDETRPredictor(
|
17 |
ckpt_path=ckpt_path,
|
|
|
24 |
return output_file
|
25 |
|
26 |
def display_prediction(result):
|
27 |
+
return f'### Start time: {result[0]:.2f}, End time: {result[1]:.2f}, Score: {result[2]:.2f}'
|
28 |
|
29 |
with gr.Blocks(theme=gr.themes.Default()) as demo:
|
30 |
output_videos = gr.State(None)
|
31 |
+
output_lbhd_videos = gr.State(None)
|
32 |
moment_prediction = gr.State(None)
|
33 |
+
our_prediction = gr.State(None)
|
34 |
+
|
35 |
gr.HTML("""<h2 align="center"> 🎞️ Highlight Detection with MomentDETR </h2>""")
|
36 |
gr.Markdown(DESCRIPTION)
|
37 |
with gr.Column():
|
|
|
42 |
input_video = gr.Video(label="Please input mp4", height=400)
|
43 |
with gr.Blocks():
|
44 |
with gr.Column():
|
45 |
+
gr.HTML("""<h3 align="center"> MomentDETR Result </h3>""")
|
46 |
playable_video = gr.Video(height=400)
|
47 |
+
display_score = gr.Markdown("### Start time, End time, Score")
|
48 |
+
with gr.Blocks():
|
49 |
+
with gr.Column():
|
50 |
+
gr.HTML("""<h3 align="center"> Ours Result </h3>""")
|
51 |
+
our_result_video = gr.Video(height=400)
|
52 |
+
display_clip_score = gr.Markdown("### Start time, End time, Score")
|
53 |
with gr.Row():
|
54 |
with gr.Column():
|
55 |
retrieval_text = gr.Textbox(
|
|
|
61 |
with gr.Column():
|
62 |
radio_button = gr.Radio(
|
63 |
choices=[i+1 for i in range(10)],
|
64 |
+
label="Top 10",
|
65 |
value=1
|
66 |
)
|
67 |
+
# display_score = gr.Markdown("### Moment Score: ")
|
68 |
+
|
69 |
|
70 |
+
def update_video_player(radio_value, output_videos, output_lbhd_videos, moment_prediction, our_prediction):
|
71 |
if output_videos is None or moment_prediction is None:
|
72 |
+
return [None, None, None, None]
|
73 |
return {
|
74 |
+
playable_video: output_videos[radio_value-1],
|
75 |
+
our_result_video: output_lbhd_videos[min(radio_value-1, len(output_lbhd_videos)-1)],
|
76 |
+
display_score: display_prediction(moment_prediction[radio_value-1]),
|
77 |
+
display_clip_score: display_prediction(our_prediction[min(radio_value-1, len(output_lbhd_videos)-1)])
|
78 |
}
|
79 |
|
80 |
def submit_video(input_video, retrieval_text):
|
81 |
print(f'== video path: {input_video}')
|
82 |
print(f'== retrieval_text: {retrieval_text}')
|
83 |
if input_video is None:
|
84 |
+
return [None, None, None, None, None, None, None, None, 1]
|
85 |
if retrieval_text is None:
|
86 |
retrieval_text = ''
|
87 |
predictions, video_frames = moment_detr_predictor.localize_moment(
|
|
|
89 |
query_list=[retrieval_text]
|
90 |
)
|
91 |
predictions = predictions[0]['pred_relevant_windows']
|
|
|
92 |
output_files = [ trim_video(
|
93 |
video_path=input_video,
|
94 |
+
start=predictions[i][0],
|
95 |
+
end=predictions[i][1],
|
96 |
output_file=f'{i}.mp4'
|
97 |
) for i in range(10)]
|
98 |
+
|
99 |
+
lbhd_predictions = lbhd_predict(input_video)
|
100 |
+
print(f'== lbhd_predictions: {lbhd_predictions}')
|
101 |
+
output_files_lbhd = [ trim_video(
|
102 |
+
video_path=input_video,
|
103 |
+
start=lbhd_predictions[i][0],
|
104 |
+
end=lbhd_predictions[i][1],
|
105 |
+
output_file=f'{i}_lbhd.mp4'
|
106 |
+
) for i in range(10)]
|
107 |
+
|
108 |
return {
|
109 |
output_videos: output_files,
|
110 |
+
output_lbhd_videos: output_files_lbhd,
|
111 |
moment_prediction: predictions,
|
112 |
+
our_prediction: lbhd_predictions,
|
113 |
playable_video: output_files[0],
|
114 |
+
our_result_video: output_files_lbhd[0],
|
115 |
display_score: display_prediction(predictions[0]),
|
116 |
+
display_clip_score: display_prediction(lbhd_predictions[0]),
|
117 |
radio_button: 1
|
118 |
}
|
119 |
|
120 |
radio_button.change(
|
121 |
fn=update_video_player,
|
122 |
+
inputs=[radio_button, output_videos, output_lbhd_videos, moment_prediction, our_prediction],
|
123 |
+
outputs=[playable_video, our_result_video, display_score, display_clip_score]
|
124 |
)
|
125 |
|
126 |
submit.click(
|
127 |
fn=submit_video,
|
128 |
inputs=[input_video, retrieval_text],
|
129 |
+
outputs=[output_videos, output_lbhd_videos, moment_prediction, our_prediction, playable_video, our_result_video, display_score, display_clip_score, radio_button]
|
130 |
)
|
131 |
|
132 |
demo.launch()
|
run_on_video/run.py
CHANGED
@@ -25,6 +25,7 @@ class MomentDETRPredictor:
|
|
25 |
)
|
26 |
print("Loading trained Moment-DETR model...")
|
27 |
self.model = build_inference_model(ckpt_path).to(self.device)
|
|
|
28 |
|
29 |
@torch.no_grad()
|
30 |
def localize_moment(self, video_path, query_list):
|
|
|
25 |
)
|
26 |
print("Loading trained Moment-DETR model...")
|
27 |
self.model = build_inference_model(ckpt_path).to(self.device)
|
28 |
+
self.model.eval()
|
29 |
|
30 |
@torch.no_grad()
|
31 |
def localize_moment(self, video_path, query_list):
|