saadfarhad's picture
Update app.py
a5e0173 verified
import gradio as gr
import torch
from transformers import AutoModel, AutoTokenizer
# Model setting
model_path = "OpenGVLab/InternVideo2_5_Chat_8B"
# Load the tokenizer and model with remote code enabled.
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
# Get the image processor from the vision tower.
image_processor = model.get_vision_tower().image_processor
# Evaluation settings
max_num_frames = 512
generation_config = {
"do_sample": False,
"temperature": 0.0,
"max_new_tokens": 1024,
"top_p": 0.1,
"num_beams": 1,
}
video_path = "your_video.mp4" # (For testing locally, update as needed)
# Single-turn conversation example:
def single_turn_chat(video_path, user_prompt):
output, chat_history = model.chat(
video_path=video_path,
tokenizer=tokenizer,
user_prompt=user_prompt,
return_history=True,
max_num_frames=max_num_frames,
generation_config=generation_config
)
return output
# Multi-turn conversation example:
def multi_turn_chat(video_path, user_prompt, chat_history):
output, chat_history = model.chat(
video_path=video_path,
tokenizer=tokenizer,
user_prompt=user_prompt,
chat_history=chat_history,
return_history=True,
max_num_frames=max_num_frames,
generation_config=generation_config
)
return output, chat_history
# For the Gradio interface, we'll combine these into a chat function.
def chat_interface(video_path, user_prompt, chat_history):
if chat_history is None:
chat_history = []
output, new_history = model.chat(
video_path=video_path,
tokenizer=tokenizer,
user_prompt=user_prompt,
chat_history=chat_history,
return_history=True,
max_num_frames=max_num_frames,
generation_config=generation_config
)
return output, new_history
# Build the Gradio interface.
with gr.Blocks() as demo:
gr.Markdown("## InternVideo2_5_Chat_8B Chat Interface")
with gr.Row():
video_input = gr.Video(label="Upload Video", type="filepath")
question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
chat_state = gr.State([]) # To maintain conversation history
output_text = gr.Textbox(label="Model Response")
send_btn = gr.Button("Send")
send_btn.click(
chat_interface,
inputs=[video_input, question_input, chat_state],
outputs=[output_text, chat_state]
)
if __name__ == "__main__":
demo.launch()