codelion commited on
Commit
0425992
·
verified ·
1 Parent(s): 63595a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -11,7 +11,7 @@ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
11
  if not GOOGLE_API_KEY:
12
  raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
13
 
14
- # Initialize the Gemini API client via AI Studio using the API key.
15
  client = genai.Client(api_key=GOOGLE_API_KEY)
16
 
17
  # Use the Gemini 2.0 Flash model.
@@ -21,8 +21,8 @@ MODEL_NAME = "gemini-2.0-flash-001"
21
  def call_gemini(video_file: str, prompt: str) -> str:
22
  """
23
  Call the Gemini model with the provided video file and prompt.
24
- The video file is read as bytes and passed with MIME type "video/mp4".
25
- The prompt is passed as a plain string.
26
  """
27
  with open(video_file, "rb") as f:
28
  file_bytes = f.read()
@@ -30,7 +30,7 @@ def call_gemini(video_file: str, prompt: str) -> str:
30
  model=MODEL_NAME,
31
  contents=[
32
  Part(file_data=file_bytes, mime_type="video/mp4"),
33
- prompt
34
  ]
35
  )
36
  return response.text
@@ -53,14 +53,15 @@ def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
53
  Ask Gemini to output key timestamps and descriptions in plain text.
54
  The prompt instructs the model to output one line per event in the format:
55
  HH:MM:SS - description
56
- We then parse these lines and extract frames using OpenCV.
57
 
58
  Returns a list of tuples: (image_array, caption)
59
  """
60
  prompt = (
61
- "List the key timestamps in the video and a brief description of the important event at that time. "
62
  "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
63
  )
 
64
  prompt += f" Video Summary: {summary}"
65
  if user_query:
66
  prompt += f" Focus on: {user_query}"
@@ -103,15 +104,16 @@ def get_key_frames(video_file: str, summary: str, user_query: str) -> list:
103
 
104
  def analyze_video(video_file: str, user_query: str) -> (str, list):
105
  """
106
- Perform a single-step video analysis on the uploaded file.
107
- First, call Gemini to get a brief summary of the video.
108
- Then, ask Gemini for key timestamps and descriptions.
109
 
110
  Returns:
111
- - A Markdown report as a string.
112
  - A gallery list of key frames (each as a tuple of (image, caption)).
113
  """
114
- summary_prompt = "Summarize this video in a few sentences, focusing on any security or surveillance insights."
 
115
  if user_query:
116
  summary_prompt += f" Also focus on: {user_query}"
117
  try:
@@ -119,6 +121,7 @@ def analyze_video(video_file: str, user_query: str) -> (str, list):
119
  except Exception as e:
120
  summary = f"[Error in summary extraction: {e}]"
121
  markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{summary}\n"
 
122
  key_frames_gallery = get_key_frames(video_file, summary, user_query)
123
  if not key_frames_gallery:
124
  markdown_report += "\n*No key frames were extracted.*\n"
 
11
  if not GOOGLE_API_KEY:
12
  raise ValueError("Please set the GOOGLE_API_KEY environment variable.")
13
 
14
+ # Initialize the Gemini API client via AI Studio.
15
  client = genai.Client(api_key=GOOGLE_API_KEY)
16
 
17
  # Use the Gemini 2.0 Flash model.
 
21
  def call_gemini(video_file: str, prompt: str) -> str:
22
  """
23
  Call the Gemini model with the provided video file and prompt.
24
+ The video is read as bytes and passed with MIME type "video/mp4",
25
+ and the prompt is wrapped as a text part.
26
  """
27
  with open(video_file, "rb") as f:
28
  file_bytes = f.read()
 
30
  model=MODEL_NAME,
31
  contents=[
32
  Part(file_data=file_bytes, mime_type="video/mp4"),
33
+ Part(text=prompt)
34
  ]
35
  )
36
  return response.text
 
53
  Ask Gemini to output key timestamps and descriptions in plain text.
54
  The prompt instructs the model to output one line per event in the format:
55
  HH:MM:SS - description
56
+ We then parse these lines and extract the corresponding frames using OpenCV.
57
 
58
  Returns a list of tuples: (image_array, caption)
59
  """
60
  prompt = (
61
+ "List the key timestamps in the video and a brief description of the event at that time. "
62
  "Output one line per event in the following format: HH:MM:SS - description. Do not include any extra text."
63
  )
64
+ # Append the summary (and user query if provided) so the model has context.
65
  prompt += f" Video Summary: {summary}"
66
  if user_query:
67
  prompt += f" Focus on: {user_query}"
 
104
 
105
  def analyze_video(video_file: str, user_query: str) -> (str, list):
106
  """
107
+ Perform a single-step video analysis.
108
+ First, call Gemini with a simple prompt to get a brief summary.
109
+ Then, call Gemini to list key timestamps with descriptions.
110
 
111
  Returns:
112
+ - A Markdown report summarizing the video.
113
  - A gallery list of key frames (each as a tuple of (image, caption)).
114
  """
115
+ # Use a very simple prompt for summary.
116
+ summary_prompt = "Summarize this video."
117
  if user_query:
118
  summary_prompt += f" Also focus on: {user_query}"
119
  try:
 
121
  except Exception as e:
122
  summary = f"[Error in summary extraction: {e}]"
123
  markdown_report = f"## Video Analysis Report\n\n**Summary:**\n\n{summary}\n"
124
+
125
  key_frames_gallery = get_key_frames(video_file, summary, user_query)
126
  if not key_frames_gallery:
127
  markdown_report += "\n*No key frames were extracted.*\n"