saadfarhad commited on
Commit
a5e0173
·
verified ·
1 Parent(s): 8d3f47a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -25
app.py CHANGED
@@ -2,15 +2,14 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoModel, AutoTokenizer
4
 
5
- # Set the model path (this is the repository/model ID on Hugging Face)
6
  model_path = "OpenGVLab/InternVideo2_5_Chat_8B"
7
 
8
  # Load the tokenizer and model with remote code enabled.
9
- # .half() converts the model to FP16 and .cuda() moves it to GPU (if available).
10
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
11
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
12
 
13
- # Get the image processor from the vision tower (if needed by the model's implementation)
14
  image_processor = model.get_vision_tower().image_processor
15
 
16
  # Evaluation settings
@@ -23,23 +22,37 @@ generation_config = {
23
  "num_beams": 1,
24
  }
25
 
26
- # Define a chat function that performs either single-turn or multi-turn conversation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def chat_interface(video_path, user_prompt, chat_history):
28
- """
29
- Performs a chat turn with the model. If no chat_history is provided,
30
- it starts a new conversation.
31
-
32
- Parameters:
33
- video_path (str): The filepath of the uploaded video.
34
- user_prompt (str): The user's question.
35
- chat_history (list): The conversation history (empty list for a new conversation).
36
-
37
- Returns:
38
- A tuple containing the model's output (str) and the updated chat history (list).
39
- """
40
  if chat_history is None:
41
  chat_history = []
42
- # The model.chat() method returns output and updated history.
43
  output, new_history = model.chat(
44
  video_path=video_path,
45
  tokenizer=tokenizer,
@@ -57,21 +70,15 @@ with gr.Blocks() as demo:
57
  with gr.Row():
58
  video_input = gr.Video(label="Upload Video", type="filepath")
59
  question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
60
- # We'll use a hidden state to keep the conversation history.
61
- chat_state = gr.State([])
62
  output_text = gr.Textbox(label="Model Response")
63
 
64
- def process_chat(video, question, history):
65
- response, new_history = chat_interface(video, question, history)
66
- return response, new_history
67
-
68
  send_btn = gr.Button("Send")
69
  send_btn.click(
70
- process_chat,
71
  inputs=[video_input, question_input, chat_state],
72
  outputs=[output_text, chat_state]
73
  )
74
 
75
- # Launch the app.
76
  if __name__ == "__main__":
77
  demo.launch()
 
2
  import torch
3
  from transformers import AutoModel, AutoTokenizer
4
 
5
+ # Model setting
6
  model_path = "OpenGVLab/InternVideo2_5_Chat_8B"
7
 
8
  # Load the tokenizer and model with remote code enabled.
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
10
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True).half().cuda()
11
 
12
+ # Get the image processor from the vision tower.
13
  image_processor = model.get_vision_tower().image_processor
14
 
15
  # Evaluation settings
 
22
  "num_beams": 1,
23
  }
24
 
25
+ video_path = "your_video.mp4" # (For testing locally, update as needed)
26
+
27
+ # Single-turn conversation example:
28
+ def single_turn_chat(video_path, user_prompt):
29
+ output, chat_history = model.chat(
30
+ video_path=video_path,
31
+ tokenizer=tokenizer,
32
+ user_prompt=user_prompt,
33
+ return_history=True,
34
+ max_num_frames=max_num_frames,
35
+ generation_config=generation_config
36
+ )
37
+ return output
38
+
39
+ # Multi-turn conversation example:
40
+ def multi_turn_chat(video_path, user_prompt, chat_history):
41
+ output, chat_history = model.chat(
42
+ video_path=video_path,
43
+ tokenizer=tokenizer,
44
+ user_prompt=user_prompt,
45
+ chat_history=chat_history,
46
+ return_history=True,
47
+ max_num_frames=max_num_frames,
48
+ generation_config=generation_config
49
+ )
50
+ return output, chat_history
51
+
52
+ # For the Gradio interface, we'll combine these into a chat function.
53
  def chat_interface(video_path, user_prompt, chat_history):
 
 
 
 
 
 
 
 
 
 
 
 
54
  if chat_history is None:
55
  chat_history = []
 
56
  output, new_history = model.chat(
57
  video_path=video_path,
58
  tokenizer=tokenizer,
 
70
  with gr.Row():
71
  video_input = gr.Video(label="Upload Video", type="filepath")
72
  question_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
73
+ chat_state = gr.State([]) # To maintain conversation history
 
74
  output_text = gr.Textbox(label="Model Response")
75
 
 
 
 
 
76
  send_btn = gr.Button("Send")
77
  send_btn.click(
78
+ chat_interface,
79
  inputs=[video_input, question_input, chat_state],
80
  outputs=[output_text, chat_state]
81
  )
82
 
 
83
  if __name__ == "__main__":
84
  demo.launch()