Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,15 +2,14 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
from transformers import AutoModel, AutoTokenizer
|
4 |
|
5 |
-
#
|
6 |
model_path = "OpenGVLab/InternVideo2_5_Chat_8B"
|
7 |
|
8 |
# Load the tokenizer and model with remote code enabled.
|
9 |
-
# .half() converts the model to FP16 and .cuda() moves it to GPU (if available).
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
11 |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
|
12 |
|
13 |
-
# Get the image processor from the vision tower
|
14 |
image_processor = model.get_vision_tower().image_processor
|
15 |
|
16 |
# Evaluation settings
|
@@ -23,23 +22,37 @@ generation_config = {
|
|
23 |
"num_beams": 1,
|
24 |
}
|
25 |
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def chat_interface(video_path, user_prompt, chat_history):
|
28 |
-
"""
|
29 |
-
Performs a chat turn with the model. If no chat_history is provided,
|
30 |
-
it starts a new conversation.
|
31 |
-
|
32 |
-
Parameters:
|
33 |
-
video_path (str): The filepath of the uploaded video.
|
34 |
-
user_prompt (str): The user's question.
|
35 |
-
chat_history (list): The conversation history (empty list for a new conversation).
|
36 |
-
|
37 |
-
Returns:
|
38 |
-
A tuple containing the model's output (str) and the updated chat history (list).
|
39 |
-
"""
|
40 |
if chat_history is None:
|
41 |
chat_history = []
|
42 |
-
# The model.chat() method returns output and updated history.
|
43 |
output, new_history = model.chat(
|
44 |
video_path=video_path,
|
45 |
tokenizer=tokenizer,
|
@@ -57,21 +70,15 @@ with gr.Blocks() as demo:
|
|
57 |
with gr.Row():
|
58 |
video_input = gr.Video(label="Upload Video", type="filepath")
|
59 |
question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
|
60 |
-
|
61 |
-
chat_state = gr.State([])
|
62 |
output_text = gr.Textbox(label="Model Response")
|
63 |
|
64 |
-
def process_chat(video, question, history):
|
65 |
-
response, new_history = chat_interface(video, question, history)
|
66 |
-
return response, new_history
|
67 |
-
|
68 |
send_btn = gr.Button("Send")
|
69 |
send_btn.click(
|
70 |
-
|
71 |
inputs=[video_input, question_input, chat_state],
|
72 |
outputs=[output_text, chat_state]
|
73 |
)
|
74 |
|
75 |
-
# Launch the app.
|
76 |
if __name__ == "__main__":
|
77 |
demo.launch()
|
|
|
2 |
import torch
|
3 |
from transformers import AutoModel, AutoTokenizer
|
4 |
|
5 |
+
# Model setting
|
6 |
model_path = "OpenGVLab/InternVideo2_5_Chat_8B"
|
7 |
|
8 |
# Load the tokenizer and model with remote code enabled.
|
|
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
10 |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
|
11 |
|
12 |
+
# Get the image processor from the vision tower.
|
13 |
image_processor = model.get_vision_tower().image_processor
|
14 |
|
15 |
# Evaluation settings
|
|
|
22 |
"num_beams": 1,
|
23 |
}
|
24 |
|
25 |
+
video_path = "your_video.mp4" # (For testing locally, update as needed)
|
26 |
+
|
27 |
+
# Single-turn conversation example:
|
28 |
+
def single_turn_chat(video_path, user_prompt):
|
29 |
+
output, chat_history = model.chat(
|
30 |
+
video_path=video_path,
|
31 |
+
tokenizer=tokenizer,
|
32 |
+
user_prompt=user_prompt,
|
33 |
+
return_history=True,
|
34 |
+
max_num_frames=max_num_frames,
|
35 |
+
generation_config=generation_config
|
36 |
+
)
|
37 |
+
return output
|
38 |
+
|
39 |
+
# Multi-turn conversation example:
|
40 |
+
def multi_turn_chat(video_path, user_prompt, chat_history):
|
41 |
+
output, chat_history = model.chat(
|
42 |
+
video_path=video_path,
|
43 |
+
tokenizer=tokenizer,
|
44 |
+
user_prompt=user_prompt,
|
45 |
+
chat_history=chat_history,
|
46 |
+
return_history=True,
|
47 |
+
max_num_frames=max_num_frames,
|
48 |
+
generation_config=generation_config
|
49 |
+
)
|
50 |
+
return output, chat_history
|
51 |
+
|
52 |
+
# For the Gradio interface, we'll combine these into a chat function.
|
53 |
def chat_interface(video_path, user_prompt, chat_history):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
if chat_history is None:
|
55 |
chat_history = []
|
|
|
56 |
output, new_history = model.chat(
|
57 |
video_path=video_path,
|
58 |
tokenizer=tokenizer,
|
|
|
70 |
with gr.Row():
|
71 |
video_input = gr.Video(label="Upload Video", type="filepath")
|
72 |
question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
|
73 |
+
chat_state = gr.State([]) # To maintain conversation history
|
|
|
74 |
output_text = gr.Textbox(label="Model Response")
|
75 |
|
|
|
|
|
|
|
|
|
76 |
send_btn = gr.Button("Send")
|
77 |
send_btn.click(
|
78 |
+
chat_interface,
|
79 |
inputs=[video_input, question_input, chat_state],
|
80 |
outputs=[output_text, chat_state]
|
81 |
)
|
82 |
|
|
|
83 |
if __name__ == "__main__":
|
84 |
demo.launch()
|