Spaces:
Running
on
Zero
Running
on
Zero
Upload folder using huggingface_hub
Browse files- .gitattributes +16 -0
- .gitignore +4 -0
- README.md +1 -1
- app.py +84 -59
- examples/10309844035.mp4 +3 -0
- examples/13887487955.mp4 +3 -0
- examples/4167294363.mp4 +3 -0
- examples/4742652230.mp4 +3 -0
- examples/4766274786.mp4 +3 -0
- examples/5012237466.mp4 +3 -0
- examples/5188348585.mp4 +3 -0
- examples/9383140374.mp4 +3 -0
- examples/DTInxNfWXVc_210.0_360.0.mp4 +3 -0
- examples/RoripwjYFp8_210.0_360.0.mp4 +3 -0
- examples/UFWQKrcbhjI_360.0_510.0.mp4 +3 -0
- examples/Z3-IZ3HAmIA_60.0_210.0.mp4 +3 -0
- examples/h6QKDqomIPk_210.0_360.0.mp4 +3 -0
- examples/pA6Z-qYhSNg_60.0_210.0.mp4 +3 -0
- examples/rrTIeJRVGjg_60.0_210.0.mp4 +3 -0
- examples/yId2wIocTys_210.0_360.0.mp4 +3 -0
- requirements.txt +6 -2
- setup.cfg +1 -1
.gitattributes
CHANGED
|
@@ -49,3 +49,19 @@ data/h6QKDqomIPk_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
| 49 |
data/pA6Z-qYhSNg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
data/rrTIeJRVGjg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
data/yId2wIocTys_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
data/pA6Z-qYhSNg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
data/rrTIeJRVGjg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
data/yId2wIocTys_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
examples/10309844035.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
examples/13887487955.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
examples/4167294363.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
examples/4742652230.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
examples/4766274786.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
examples/5012237466.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
examples/5188348585.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
examples/9383140374.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
examples/DTInxNfWXVc_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
examples/RoripwjYFp8_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
examples/UFWQKrcbhjI_360.0_510.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
examples/Z3-IZ3HAmIA_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
examples/h6QKDqomIPk_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
examples/pA6Z-qYhSNg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
examples/rrTIeJRVGjg_60.0_210.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
examples/yId2wIocTys_210.0_360.0.mp4 filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
|
@@ -5,5 +5,9 @@ __pycache__
|
|
| 5 |
*$py.class
|
| 6 |
|
| 7 |
# Temporary data
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
.DS_Store
|
| 9 |
._*
|
|
|
|
| 5 |
*$py.class
|
| 6 |
|
| 7 |
# Temporary data
|
| 8 |
+
/data*
|
| 9 |
+
/demo/examples
|
| 10 |
+
/model_zoo
|
| 11 |
+
/work_dirs*
|
| 12 |
.DS_Store
|
| 13 |
._*
|
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: 💡
|
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: true
|
| 10 |
license: bsd-3-clause
|
|
|
|
| 4 |
colorFrom: red
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.15.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: true
|
| 10 |
license: bsd-3-clause
|
app.py
CHANGED
|
@@ -5,60 +5,92 @@ import json
|
|
| 5 |
import os
|
| 6 |
import random
|
| 7 |
import time
|
| 8 |
-
from functools import partial
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
import nncore
|
|
|
|
| 12 |
import torch
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
|
| 15 |
-
import spaces
|
| 16 |
from videomind.constants import GROUNDER_PROMPT, PLANNER_PROMPT, VERIFIER_PROMPT
|
| 17 |
from videomind.dataset.utils import process_vision_info
|
| 18 |
from videomind.model.builder import build_model
|
| 19 |
from videomind.utils.io import get_duration
|
| 20 |
from videomind.utils.parser import parse_query, parse_span
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
BASE_MODEL = 'model_zoo/Qwen2-VL-2B-Instruct'
|
| 23 |
-
|
| 24 |
|
| 25 |
MODEL = 'model_zoo/VideoMind-2B'
|
| 26 |
-
|
| 27 |
|
| 28 |
TITLE = 'VideoMind: A Chain-of-LoRA Agent for Long Video Reasoning'
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
# yapf:disable
|
| 34 |
EXAMPLES = [
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
]
|
| 52 |
# yapf:enable
|
| 53 |
|
| 54 |
-
|
|
|
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
def seconds_to_hms(seconds):
|
|
@@ -89,7 +121,9 @@ def reset_components():
|
|
| 89 |
|
| 90 |
|
| 91 |
@spaces.GPU
|
| 92 |
-
def main(video, prompt, role, temperature, max_new_tokens
|
|
|
|
|
|
|
| 93 |
history = []
|
| 94 |
|
| 95 |
if not video:
|
|
@@ -525,40 +559,20 @@ def main(video, prompt, role, temperature, max_new_tokens, model, processor, dev
|
|
| 525 |
yield history
|
| 526 |
|
| 527 |
|
| 528 |
-
|
| 529 |
-
if not nncore.is_dir(BASE_MODEL):
|
| 530 |
-
snapshot_download(BASE_MODEL_HF, local_dir=BASE_MODEL)
|
| 531 |
-
|
| 532 |
-
if not nncore.is_dir(MODEL):
|
| 533 |
-
snapshot_download(MODEL_HF, local_dir=MODEL)
|
| 534 |
-
|
| 535 |
-
print('Initializing role *grounder*')
|
| 536 |
-
model, processor = build_model(MODEL)
|
| 537 |
-
|
| 538 |
-
print('Initializing role *planner*')
|
| 539 |
-
model.load_adapter(nncore.join(MODEL, 'planner'), adapter_name='planner')
|
| 540 |
-
|
| 541 |
-
print('Initializing role *verifier*')
|
| 542 |
-
model.load_adapter(nncore.join(MODEL, 'verifier'), adapter_name='verifier')
|
| 543 |
-
|
| 544 |
-
device = torch.device('cuda')
|
| 545 |
-
|
| 546 |
-
main = partial(main, model=model, processor=processor, device=device)
|
| 547 |
-
|
| 548 |
-
path = os.path.dirname(os.path.realpath(__file__))
|
| 549 |
-
|
| 550 |
chat = gr.Chatbot(
|
| 551 |
type='messages',
|
| 552 |
height='70vh',
|
| 553 |
-
avatar_images=[f'{
|
| 554 |
placeholder='A conversation with VideoMind',
|
| 555 |
label='VideoMind')
|
| 556 |
|
| 557 |
prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
|
| 558 |
|
| 559 |
-
with gr.Blocks(title=TITLE
|
| 560 |
-
gr.Markdown(
|
| 561 |
-
gr.
|
|
|
|
| 562 |
|
| 563 |
with gr.Row():
|
| 564 |
with gr.Column(scale=3):
|
|
@@ -592,7 +606,11 @@ if __name__ == '__main__':
|
|
| 592 |
label='Max Output Tokens',
|
| 593 |
info='The maximum number of output tokens for each role (Default: 256)')
|
| 594 |
|
| 595 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 596 |
|
| 597 |
with gr.Row():
|
| 598 |
random_btn = gr.Button(value='🔮 Random')
|
|
@@ -606,9 +624,16 @@ if __name__ == '__main__':
|
|
| 606 |
submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
|
| 607 |
submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
|
| 608 |
|
| 609 |
-
gr.Markdown('##### Need
|
| 610 |
|
| 611 |
with gr.Column(scale=5):
|
| 612 |
chat.render()
|
| 613 |
|
| 614 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
import os
|
| 6 |
import random
|
| 7 |
import time
|
|
|
|
| 8 |
|
| 9 |
import gradio as gr
|
| 10 |
import nncore
|
| 11 |
+
import spaces
|
| 12 |
import torch
|
| 13 |
from huggingface_hub import snapshot_download
|
| 14 |
|
|
|
|
| 15 |
from videomind.constants import GROUNDER_PROMPT, PLANNER_PROMPT, VERIFIER_PROMPT
|
| 16 |
from videomind.dataset.utils import process_vision_info
|
| 17 |
from videomind.model.builder import build_model
|
| 18 |
from videomind.utils.io import get_duration
|
| 19 |
from videomind.utils.parser import parse_query, parse_span
|
| 20 |
|
| 21 |
+
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
| 22 |
+
|
| 23 |
+
PATH = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
|
| 24 |
+
|
| 25 |
BASE_MODEL = 'model_zoo/Qwen2-VL-2B-Instruct'
|
| 26 |
+
BASE_MODEL_REPO = 'Qwen/Qwen2-VL-2B-Instruct'
|
| 27 |
|
| 28 |
MODEL = 'model_zoo/VideoMind-2B'
|
| 29 |
+
MODEL_REPO = 'yeliudev/VideoMind-2B'
|
| 30 |
|
| 31 |
TITLE = 'VideoMind: A Chain-of-LoRA Agent for Long Video Reasoning'
|
| 32 |
|
| 33 |
+
BADGE = """
|
| 34 |
+
<h3 align="center" style="margin-top: -0.5em;">A Chain-of-LoRA Agent for Long Video Reasoning</h3>
|
| 35 |
+
<div style="display: flex; justify-content: center; gap: 5px; margin-bottom: -0.7em !important;">
|
| 36 |
+
<a href="https://arxiv.org/abs/2503.13444" target="_blank">
|
| 37 |
+
<img src="https://img.shields.io/badge/arXiv-2503.13444-red">
|
| 38 |
+
</a>
|
| 39 |
+
<a href="https://videomind.github.io/" target="_blank">
|
| 40 |
+
<img src="https://img.shields.io/badge/Project-Page-brightgreen">
|
| 41 |
+
</a>
|
| 42 |
+
<a href="https://huggingface.co/collections/yeliudev/videomind-67dd41f42c57f0e7433afb36" target="_blank">
|
| 43 |
+
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue">
|
| 44 |
+
</a>
|
| 45 |
+
<a href="https://huggingface.co/datasets/yeliudev/VideoMind-Dataset" target="_blank">
|
| 46 |
+
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Dataset-orange">
|
| 47 |
+
</a>
|
| 48 |
+
<a href="https://github.com/yeliudev/VideoMind/blob/main/README.md" target="_blank">
|
| 49 |
+
<img src="https://img.shields.io/badge/License-BSD--3--Clause-purple">
|
| 50 |
+
</a>
|
| 51 |
+
</div>
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
LOGO = '<p align="center"><img width="350" src="https://raw.githubusercontent.com/yeliudev/VideoMind/refs/heads/main/.github/logo.png"></p>'
|
| 55 |
+
DISC = '**VideoMind** is a multi-modal agent framework that enhances video reasoning by emulating *human-like* processes, such as *breaking down tasks*, *localizing and verifying moments*, and *synthesizing answers*. This approach addresses the unique challenges of temporal-grounded reasoning in a progressive strategy. This demo showcases how VideoMind-2B handles video-language tasks. Please open an <a href="https://github.com/yeliudev/VideoMind/issues/new" target="_blank">issue</a> if you meet any problems or have any suggestions.' # noqa
|
| 56 |
|
| 57 |
# yapf:disable
|
| 58 |
EXAMPLES = [
|
| 59 |
+
[f'{PATH}/examples/4167294363.mp4', 'Why did the old man stand up?', ['pla', 'gnd', 'ver', 'ans']],
|
| 60 |
+
[f'{PATH}/examples/5012237466.mp4', 'How does the child in stripes react about the fountain?', ['pla', 'gnd', 'ver', 'ans']],
|
| 61 |
+
[f'{PATH}/examples/13887487955.mp4', 'What did the excavator do after it pushed the cement forward?', ['pla', 'gnd', 'ver', 'ans']],
|
| 62 |
+
[f'{PATH}/examples/5188348585.mp4', 'What did the person do before pouring the liquor?', ['pla', 'gnd', 'ver', 'ans']],
|
| 63 |
+
[f'{PATH}/examples/4766274786.mp4', 'What did the girl do after the baby lost the balloon?', ['pla', 'gnd', 'ver', 'ans']],
|
| 64 |
+
[f'{PATH}/examples/4742652230.mp4', 'Why is the girl pushing the boy only around the toy but not to other places?', ['pla', 'gnd', 'ver', 'ans']],
|
| 65 |
+
[f'{PATH}/examples/9383140374.mp4', 'How does the girl in pink control the movement of the claw?', ['pla', 'gnd', 'ver', 'ans']],
|
| 66 |
+
[f'{PATH}/examples/10309844035.mp4', 'Why are they holding up the phones?', ['pla', 'gnd', 'ver', 'ans']],
|
| 67 |
+
[f'{PATH}/examples/pA6Z-qYhSNg_60.0_210.0.mp4', 'Different types of meat products are being cut, shaped and prepared', ['gnd', 'ver']],
|
| 68 |
+
[f'{PATH}/examples/UFWQKrcbhjI_360.0_510.0.mp4', 'A man talks to the camera whilst walking along a roadside in a rural area', ['gnd', 'ver']],
|
| 69 |
+
[f'{PATH}/examples/RoripwjYFp8_210.0_360.0.mp4', 'A woman wearing glasses eating something at a street market', ['gnd', 'ver']],
|
| 70 |
+
[f'{PATH}/examples/h6QKDqomIPk_210.0_360.0.mp4', 'A toddler sits in his car seat, holding his yellow tablet', ['gnd', 'ver']],
|
| 71 |
+
[f'{PATH}/examples/Z3-IZ3HAmIA_60.0_210.0.mp4', 'A view from the window as the plane accelerates and takes off from the runway', ['gnd', 'ver']],
|
| 72 |
+
[f'{PATH}/examples/yId2wIocTys_210.0_360.0.mp4', "Temporally locate the visual content mentioned in the text query 'kids exercise in front of parked cars' within the video.", ['pla', 'gnd', 'ver']],
|
| 73 |
+
[f'{PATH}/examples/rrTIeJRVGjg_60.0_210.0.mp4', "Localize the moment that provides relevant context about 'man stands in front of a white building monologuing'.", ['pla', 'gnd', 'ver']],
|
| 74 |
+
[f'{PATH}/examples/DTInxNfWXVc_210.0_360.0.mp4', "Find the video segment that corresponds to the given textual query 'man with headphones talking'.", ['pla', 'gnd', 'ver']],
|
| 75 |
]
|
| 76 |
# yapf:enable
|
| 77 |
|
| 78 |
+
if not nncore.is_dir(BASE_MODEL):
|
| 79 |
+
snapshot_download(BASE_MODEL_REPO, local_dir=BASE_MODEL)
|
| 80 |
|
| 81 |
+
if not nncore.is_dir(MODEL):
|
| 82 |
+
snapshot_download(MODEL_REPO, local_dir=MODEL)
|
| 83 |
+
|
| 84 |
+
print('Initializing role *grounder*')
|
| 85 |
+
model, processor = build_model(MODEL)
|
| 86 |
+
|
| 87 |
+
print('Initializing role *planner*')
|
| 88 |
+
model.load_adapter(nncore.join(MODEL, 'planner'), adapter_name='planner')
|
| 89 |
+
|
| 90 |
+
print('Initializing role *verifier*')
|
| 91 |
+
model.load_adapter(nncore.join(MODEL, 'verifier'), adapter_name='verifier')
|
| 92 |
+
|
| 93 |
+
device = torch.device('cuda')
|
| 94 |
|
| 95 |
|
| 96 |
def seconds_to_hms(seconds):
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
@spaces.GPU
|
| 124 |
+
def main(video, prompt, role, temperature, max_new_tokens):
|
| 125 |
+
global model, processor, device
|
| 126 |
+
|
| 127 |
history = []
|
| 128 |
|
| 129 |
if not video:
|
|
|
|
| 559 |
yield history
|
| 560 |
|
| 561 |
|
| 562 |
+
def build_demo():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
chat = gr.Chatbot(
|
| 564 |
type='messages',
|
| 565 |
height='70vh',
|
| 566 |
+
avatar_images=[f'{PATH}/assets/user.png', f'{PATH}/assets/bot.png'],
|
| 567 |
placeholder='A conversation with VideoMind',
|
| 568 |
label='VideoMind')
|
| 569 |
|
| 570 |
prompt = gr.Textbox(label='Text Prompt', placeholder='Ask a question about the video...')
|
| 571 |
|
| 572 |
+
with gr.Blocks(title=TITLE) as demo:
|
| 573 |
+
gr.Markdown(LOGO)
|
| 574 |
+
gr.HTML(BADGE)
|
| 575 |
+
gr.Markdown(DISC)
|
| 576 |
|
| 577 |
with gr.Row():
|
| 578 |
with gr.Column(scale=3):
|
|
|
|
| 606 |
label='Max Output Tokens',
|
| 607 |
info='The maximum number of output tokens for each role (Default: 256)')
|
| 608 |
|
| 609 |
+
with gr.Group():
|
| 610 |
+
prompt.render()
|
| 611 |
+
|
| 612 |
+
with gr.Accordion(label='Examples', open=False):
|
| 613 |
+
gr.Examples(examples=EXAMPLES, inputs=[video, prompt, role], examples_per_page=3)
|
| 614 |
|
| 615 |
with gr.Row():
|
| 616 |
random_btn = gr.Button(value='🔮 Random')
|
|
|
|
| 624 |
submit_ctx = submit_ctx.then(main, [video, prompt, role, temperature, max_new_tokens], chat)
|
| 625 |
submit_ctx.then(enable_btns, None, [random_btn, reset_btn, submit_btn])
|
| 626 |
|
| 627 |
+
gr.Markdown('##### Need example data? Explore examples tab or click 🔮 Random to sample one!')
|
| 628 |
|
| 629 |
with gr.Column(scale=5):
|
| 630 |
chat.render()
|
| 631 |
|
| 632 |
+
return demo
|
| 633 |
+
|
| 634 |
+
|
| 635 |
+
if __name__ == '__main__':
|
| 636 |
+
demo = build_demo()
|
| 637 |
+
|
| 638 |
+
demo.queue()
|
| 639 |
+
demo.launch(server_name='0.0.0.0', allowed_paths=[f'{PATH}/assets', f'{PATH}/examples'])
|
examples/10309844035.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8996ff134787d6b769c2491b9079a02c05953465ad770f07a8d9138e2668d24f
|
| 3 |
+
size 4041678
|
examples/13887487955.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5fecab1076ee42b3804718f9f64bef06cbfafd6995ad5f5ee42ba6354721429
|
| 3 |
+
size 5544739
|
examples/4167294363.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d0e0a4a381836f68e16a816d87f241fed3e31ea321f544b921743d6c1c50666
|
| 3 |
+
size 6611151
|
examples/4742652230.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8733ab4b0716d13ea7a79fc4ddacaf9eede567db364f0ecddfa4582c2f237f82
|
| 3 |
+
size 2200304
|
examples/4766274786.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afa38a9ce9e89f934293214d79755c89159664223b3ca366813fd5fe524ed013
|
| 3 |
+
size 3395545
|
examples/5012237466.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd1929aa93d037f809f402e9801047125dc9fe8060301e69ded9ba1f2d785cc8
|
| 3 |
+
size 4822293
|
examples/5188348585.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b225f448a546ba2f65958f18c6731a6dde9b1f437014e90036b22eb40e9ad0a5
|
| 3 |
+
size 5051675
|
examples/9383140374.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30b6b3eb43f711bef194150d473a59850ff5d7fec0f5cc30e7526aa9e382303f
|
| 3 |
+
size 2518081
|
examples/DTInxNfWXVc_210.0_360.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a09eee0dc404688731fb768c120d3519605f2343376b9bd727a71b91379fd9a9
|
| 3 |
+
size 4999970
|
examples/RoripwjYFp8_210.0_360.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b39b15158dc20c0bc6f1758a9239c8f3eed20ba4a90953338eec2246fa8f1f0
|
| 3 |
+
size 9287252
|
examples/UFWQKrcbhjI_360.0_510.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8669153d9ffac4b5534c20fab8d795347f5babe588da9b8330e049d623ebb443
|
| 3 |
+
size 14510618
|
examples/Z3-IZ3HAmIA_60.0_210.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b3a342993ee61efc5f3b859cd9c1e0d360b3331eed9deb8466891e4bcacc554
|
| 3 |
+
size 14397799
|
examples/h6QKDqomIPk_210.0_360.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:103820de2b8a1a3935b39ed80d91cd08e546e5617310b3d1bb3dadb06b2ffb95
|
| 3 |
+
size 13485144
|
examples/pA6Z-qYhSNg_60.0_210.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c84660fd4ebd8c23a2a7364174b1e819fec8b0e1cb8b9d9cd86f9e429cbdf66c
|
| 3 |
+
size 8658509
|
examples/rrTIeJRVGjg_60.0_210.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efe6f48a49963bd4880ef5065840e05dd25e2aa975870140bcdaf4220bbd2827
|
| 3 |
+
size 11410412
|
examples/yId2wIocTys_210.0_360.0.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:447fcb1fd1f94ed6a88d56dd0f6f859646cb8c58ed8e3b7a82f374e2cfee1646
|
| 3 |
+
size 14769130
|
requirements.txt
CHANGED
|
@@ -1,10 +1,8 @@
|
|
| 1 |
accelerate==1.2.1
|
| 2 |
decord==0.6.0
|
| 3 |
-
gradio==4.44.1
|
| 4 |
nncore==0.4.5
|
| 5 |
pandas==2.2.3
|
| 6 |
peft==0.14.0
|
| 7 |
-
pydantic==2.10.6
|
| 8 |
pysrt==1.1.2
|
| 9 |
scikit-image==0.25.0
|
| 10 |
scikit-learn==1.6.1
|
|
@@ -13,6 +11,12 @@ spaces==0.34.0
|
|
| 13 |
termplotlib==0.3.9
|
| 14 |
triton==3.0.0
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# our codebase contains necessary patches for 4.45.2
|
| 17 |
transformers==4.45.2
|
| 18 |
|
|
|
|
| 1 |
accelerate==1.2.1
|
| 2 |
decord==0.6.0
|
|
|
|
| 3 |
nncore==0.4.5
|
| 4 |
pandas==2.2.3
|
| 5 |
peft==0.14.0
|
|
|
|
| 6 |
pysrt==1.1.2
|
| 7 |
scikit-image==0.25.0
|
| 8 |
scikit-learn==1.6.1
|
|
|
|
| 11 |
termplotlib==0.3.9
|
| 12 |
triton==3.0.0
|
| 13 |
|
| 14 |
+
# gradio 5.16.0 to 5.23.1 have wrong horizontal margins
|
| 15 |
+
gradio==5.15.0
|
| 16 |
+
|
| 17 |
+
# https://github.com/gradio-app/gradio/issues/10662
|
| 18 |
+
pydantic==2.10.6
|
| 19 |
+
|
| 20 |
# our codebase contains necessary patches for 4.45.2
|
| 21 |
transformers==4.45.2
|
| 22 |
|
setup.cfg
CHANGED
|
@@ -7,7 +7,7 @@ split_before_expression_after_opening_paren = true
|
|
| 7 |
[isort]
|
| 8 |
line_length = 120
|
| 9 |
multi_line_output = 0
|
| 10 |
-
known_third_party = decord,deepspeed,gradio,huggingface_hub,nncore,numpy,pandas,peft,PIL,pysrt,safetensors,tabulate,termplotlib,torch,torchvision,transformers
|
| 11 |
no_lines_before = STDLIB,LOCALFOLDER
|
| 12 |
default_section = FIRSTPARTY
|
| 13 |
|
|
|
|
| 7 |
[isort]
|
| 8 |
line_length = 120
|
| 9 |
multi_line_output = 0
|
| 10 |
+
known_third_party = decord,deepspeed,gradio,huggingface_hub,nncore,numpy,pandas,peft,PIL,pysrt,safetensors,spaces,tabulate,termplotlib,torch,torchvision,transformers
|
| 11 |
no_lines_before = STDLIB,LOCALFOLDER
|
| 12 |
default_section = FIRSTPARTY
|
| 13 |
|