Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoModel, AutoTokenizer | |
# Model setting | |
model_path = "OpenGVLab/InternVideo2_5_Chat_8B" | |
# Load the tokenizer and model with remote code enabled. | |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda() | |
# Get the image processor from the vision tower. | |
image_processor = model.get_vision_tower().image_processor | |
# Evaluation settings | |
max_num_frames = 512 | |
generation_config = { | |
"do_sample": False, | |
"temperature": 0.0, | |
"max_new_tokens": 1024, | |
"top_p": 0.1, | |
"num_beams": 1, | |
} | |
video_path = "your_video.mp4" # (For testing locally, update as needed) | |
# Single-turn conversation example: | |
def single_turn_chat(video_path, user_prompt): | |
output, chat_history = model.chat( | |
video_path=video_path, | |
tokenizer=tokenizer, | |
user_prompt=user_prompt, | |
return_history=True, | |
max_num_frames=max_num_frames, | |
generation_config=generation_config | |
) | |
return output | |
# Multi-turn conversation example: | |
def multi_turn_chat(video_path, user_prompt, chat_history): | |
output, chat_history = model.chat( | |
video_path=video_path, | |
tokenizer=tokenizer, | |
user_prompt=user_prompt, | |
chat_history=chat_history, | |
return_history=True, | |
max_num_frames=max_num_frames, | |
generation_config=generation_config | |
) | |
return output, chat_history | |
# For the Gradio interface, we'll combine these into a chat function. | |
def chat_interface(video_path, user_prompt, chat_history): | |
if chat_history is None: | |
chat_history = [] | |
output, new_history = model.chat( | |
video_path=video_path, | |
tokenizer=tokenizer, | |
user_prompt=user_prompt, | |
chat_history=chat_history, | |
return_history=True, | |
max_num_frames=max_num_frames, | |
generation_config=generation_config | |
) | |
return output, new_history | |
# Build the Gradio interface. | |
with gr.Blocks() as demo: | |
gr.Markdown("## InternVideo2_5_Chat_8B Chat Interface") | |
with gr.Row(): | |
video_input = gr.Video(label="Upload Video", type="filepath") | |
question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...") | |
chat_state = gr.State([]) # To maintain conversation history | |
output_text = gr.Textbox(label="Model Response") | |
send_btn = gr.Button("Send") | |
send_btn.click( | |
chat_interface, | |
inputs=[video_input, question_input, chat_state], | |
outputs=[output_text, chat_state] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |