bstraehle commited on
Commit
a09b432
·
verified ·
1 Parent(s): 2f4d8f8

Update crew.py

Browse files
Files changed (1) hide show
  1. crew.py +75 -120
crew.py CHANGED
@@ -10,17 +10,19 @@ from google import genai
10
  from openai import OpenAI
11
  from openinference.instrumentation.crewai import CrewAIInstrumentor
12
  from phoenix.otel import register
13
- #from pytubefix import YouTube
14
- from util import get_final_answer #, get_img_b64, get_imgs_b64
15
 
16
  ## LLMs
17
 
18
  MANAGER_MODEL = "gpt-4.1"
19
  AGENT_MODEL = "gpt-4.1"
 
20
  FINAL_ANSWER_MODEL = "gpt-4.5-preview"
21
- AUDIO_MODEL = "gpt-4o-transcribe"
22
- IMAGE_MODEL = "gpt-4.1"
23
- VIDEO_MODEL = "gpt-4.1-mini"
 
 
24
 
25
  # LLM evaluation
26
 
@@ -37,162 +39,115 @@ tracer_provider = register(
37
  CrewAIInstrumentor().instrument(tracer_provider=tracer_provider)
38
 
39
  def run_crew(question, file_path):
40
- # Custom tools
41
 
42
- @tool("Audio Analysis Tool")
43
- def audio_analysis_tool(question: str, file_path: str) -> str:
44
- """Answer a question about an audio file.
45
 
46
  Args:
47
- question (str): Question about the audio file
48
- file_path (str): Path of the audio file
49
 
50
  Returns:
51
- str: Answer to the question about the audio file
52
 
53
  Raises:
54
- FileNotFoundError: If the audio file does not exist
55
  RuntimeError: If processing fails"""
56
- if not os.path.exists(file_path):
57
- raise FileNotFoundError(f"Audio file not found: {file_path}")
58
-
59
  try:
60
- client = OpenAI()
61
 
62
- transcript = client.audio.transcriptions.create(
63
- file=open(file_path, "rb"),
64
- model=AUDIO_MODEL,
65
- prompt=question
 
66
  )
67
-
68
- return transcript.text
69
  except Exception as e:
70
- raise RuntimeError(f"Failed to process audio: {str(e)}")
71
 
72
- @tool("Image Analysis Tool")
73
- def image_analysis_tool(question: str, file_path: str) -> str:
74
- """Answer a question about an image file.
75
 
76
  Args:
77
- question (str): Question about the image file
78
- file_path (str): Path of the image file
79
 
80
  Returns:
81
- str: Answer to the question about the image file
82
 
83
  Raises:
84
- FileNotFoundError: If the image file does not exist
85
  RuntimeError: If processing fails"""
86
- if not os.path.exists(file_path):
87
- raise FileNotFoundError(f"Image file not found: {file_path}")
88
-
89
  try:
90
- # Get image
91
-
92
- img_b64 = get_img_b64(file_path)
93
 
94
- # OpenAI
95
 
96
- client = OpenAI()
97
-
98
- completion = client.chat.completions.create(
99
- messages=[{"role": "user",
100
- "content": [{"type": "text", "text": question},
101
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}]}],
102
- model=IMAGE_MODEL
103
  )
104
-
105
- return completion.choices[0].message.content
106
  except Exception as e:
107
- raise RuntimeError(f"Failed to process image: {str(e)}")
108
-
109
- @tool("YouTube Audio Analysis Tool")
110
- def youtube_audio_analysis_tool(question: str, url: str) -> str:
111
- """Answer an audio question about a YouTube video.
112
 
113
  Args:
114
- question (str): Audio question about YouTube video
115
- url (str): YouTube URL
116
 
117
  Returns:
118
- str: Answer to the audio question about YouTube video
119
 
120
  Raises:
121
  RuntimeError: If processing fails"""
122
  try:
123
- # YouTube (hack to deal with access issues)
124
-
125
- if url.endswith("1htKBjuUWec"):
126
- file_path = "data/1htKBjuUWec.mp4"
127
- else:
128
- raise RuntimeError()
129
 
130
- #file_path = "audio.mp4"
131
- #yt = YouTube(url, use_oauth=True, allow_oauth_cache=True)
132
- #stream = yt.streams.filter(only_audio=True).first()
133
- #tream.download(filename=file_path)
134
-
135
- # OpenAI
136
 
137
- client = OpenAI()
138
-
139
- transcription = client.audio.transcriptions.create(
140
- file=open(file_path, "rb"),
141
- model=AUDIO_MODEL,
142
- prompt=question
143
  )
144
-
145
- return transcription.text
146
- except Exception as e:
147
- raise RuntimeError(f"Failed to process audio: {str(e)}")
148
 
149
- @tool("YouTube Image Analysis Tool")
150
- def youtube_image_analysis_tool(question: str, url: str) -> str:
151
- """Answer an image question about a YouTube video.
 
 
 
 
152
 
153
  Args:
154
- question (str): Image question about YouTube video
155
- url (str): YouTube URL
156
 
157
  Returns:
158
- str: Answer to the image question about YouTube video
159
 
160
  Raises:
161
  RuntimeError: If processing fails"""
162
  try:
163
- # YouTube (hack to deal with access issues)
164
-
165
- if url.endswith("L1vXCYZAYYM"):
166
- file_path = "data/L1vXCYZAYYM.mp4"
167
- else:
168
- raise RuntimeError()
169
-
170
- #file_path = "video.mp4"
171
- #yt = YouTube(url, use_oauth=True, allow_oauth_cache=True)
172
- #stream = yt.streams.get_highest_resolution()
173
- #stream.download(filename=file_path)
174
-
175
- # Get images
176
-
177
- imgs_b64 = get_imgs_b64(file_path)
178
-
179
- # OpenAI
180
-
181
- client = OpenAI()
182
-
183
- response = client.responses.create(
184
- input=[{"role": "user",
185
- "content": [{"type": "input_text", "text": (question)},
186
- *[{"type": "input_image", "image_url": f"data:image/jpeg;base64,{img_b64}"} for img_b64 in imgs_b64]]}],
187
- model=VIDEO_MODEL
188
  )
189
-
190
- return response.output_text
191
  except Exception as e:
192
- raise RuntimeError(f"Failed to process video: {str(e)}")
193
 
194
- # Built-in tools
195
-
196
  web_search_tool = SerperDevTool()
197
  web_rag_tool = WebsiteSearchTool()
198
  python_coding_tool = CodeInterpreterTool()
@@ -243,10 +198,10 @@ def run_crew(question, file_path):
243
  verbose=False
244
  )
245
 
246
- youtube_image_analysis_agent = Agent(
247
- role="YouTube Image Analysis Agent",
248
- goal="Analyze YouTube video to help answer image question \"{question}\"",
249
- backstory="As an expert YouTube image analysis assistant, you analyze the video to help answer the question.",
250
  allow_delegation=False,
251
  llm=AGENT_MODEL,
252
  max_iter=2,
@@ -289,10 +244,10 @@ def run_crew(question, file_path):
289
 
290
  crew = Crew(
291
  agents=[web_search_agent,
292
- audio_analysis_agent,
293
  image_analysis_agent,
294
- youtube_audio_analysis_agent,
295
- youtube_image_analysis_agent,
 
296
  python_coding_agent],
297
  manager_agent=manager_agent,
298
  tasks=[manager_task],
 
10
  from openai import OpenAI
11
  from openinference.instrumentation.crewai import CrewAIInstrumentor
12
  from phoenix.otel import register
13
+ from util import get_final_answer
 
14
 
15
  ## LLMs
16
 
17
  MANAGER_MODEL = "gpt-4.1"
18
  AGENT_MODEL = "gpt-4.1"
19
+
20
  FINAL_ANSWER_MODEL = "gpt-4.5-preview"
21
+
22
+ IMAGE_MODEL = "gemini-2.0-flash"
23
+ AUDIO_MODEL = "gemini-2.0-flash"
24
+ VIDEO_MODEL = "gemini-2.0-flash"
25
+ YOUTUBE_MODEL = "gemini-2.0-flash"
26
 
27
  # LLM evaluation
28
 
 
39
  CrewAIInstrumentor().instrument(tracer_provider=tracer_provider)
40
 
41
  def run_crew(question, file_path):
42
+ # Tools
43
 
44
+ @tool("Image Analysis Tool")
45
+ def image_analysis_tool(question: str, file_path: str) -> str:
46
+ """Answer a question about an image file.
47
 
48
  Args:
49
+ question (str): Question about an image file
50
+ file_path (str): The image file path
51
 
52
  Returns:
53
+ str: Answer to the question about the image file
54
 
55
  Raises:
 
56
  RuntimeError: If processing fails"""
 
 
 
57
  try:
58
+ client = genai.Client(api_key="GOOGLE_API_KEY")
59
 
60
+ file = client.files.upload(file=file_path)
61
+
62
+ response = client.models.generate_content(
63
+ model=IMAGE_MODEL,
64
+ contents=[file, question]
65
  )
66
+
67
+ return response.text
68
  except Exception as e:
69
+ raise RuntimeError(f"Processing failed: {str(e)}")
70
 
71
+ @tool("Audio Analysis Tool")
72
+ def audio_analysis_tool(question: str, file_path: str) -> str:
73
+ """Answer a question about an audio file.
74
 
75
  Args:
76
+ question (str): Question about an audio file
77
+ file_path (str): The audio file path
78
 
79
  Returns:
80
+ str: Answer to the question about the audio file
81
 
82
  Raises:
 
83
  RuntimeError: If processing fails"""
 
 
 
84
  try:
85
+ client = genai.Client(api_key="GOOGLE_API_KEY")
 
 
86
 
87
+ file = client.files.upload(file=file_path)
88
 
89
+ response = client.models.generate_content(
90
+ model=AUDIO_MODEL,
91
+ contents=[file, question]
 
 
 
 
92
  )
93
+
94
+ return response.text
95
  except Exception as e:
96
+ raise RuntimeError(f"Processing failed: {str(e)}")
97
+
98
+ @tool("Video Analysis Tool")
99
+ def video_analysis_tool(question: str, file_path: str) -> str:
100
+ """Answer a question about a video file.
101
 
102
  Args:
103
+ question (str): Question about a video file
104
+ file_path (str): The video file path
105
 
106
  Returns:
107
+ str: Answer to the question about the video file
108
 
109
  Raises:
110
  RuntimeError: If processing fails"""
111
  try:
112
+ client = genai.Client(api_key="GOOGLE_API_KEY")
 
 
 
 
 
113
 
114
+ file = client.files.upload(file=file_path)
 
 
 
 
 
115
 
116
+ response = client.models.generate_content(
117
+ model=VIDEO_MODEL,
118
+ contents=[file, question]
 
 
 
119
  )
 
 
 
 
120
 
121
+ return response.text
122
+ except Exception as e:
123
+ raise RuntimeError(f"Processing failed: {str(e)}")
124
+
125
+ @tool("YouTube Analysis Tool")
126
+ def youtube_analysis_tool(question: str, url: str) -> str:
127
+ """Answer a question about a YouTube video.
128
 
129
  Args:
130
+ question (str): Question about a YouTube video
131
+ url (str): The YouTube video URL
132
 
133
  Returns:
134
+ str: Answer to the question about the YouTube video
135
 
136
  Raises:
137
  RuntimeError: If processing fails"""
138
  try:
139
+ client = genai.Client(api_key="GOOGLE_API_KEY")
140
+
141
+ return client.models.generate_content(
142
+ model=YOUTUBE_MODEL,
143
+ contents=types.Content(
144
+ parts=[types.Part(file_data=types.FileData(file_uri=url)),
145
+ types.Part(text=question)]
146
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  )
 
 
148
  except Exception as e:
149
+ raise RuntimeError(f"Processing failed: {str(e)}")
150
 
 
 
151
  web_search_tool = SerperDevTool()
152
  web_rag_tool = WebsiteSearchTool()
153
  python_coding_tool = CodeInterpreterTool()
 
198
  verbose=False
199
  )
200
 
201
+ youtube_analysis_agent = Agent(
202
+ role="YouTube Analysis Agent",
203
+ goal="Analyze YouTube video to help answer question \"{question}\"",
204
+ backstory="As an expert YouTube analysis assistant, you analyze the video to help answer the question.",
205
  allow_delegation=False,
206
  llm=AGENT_MODEL,
207
  max_iter=2,
 
244
 
245
  crew = Crew(
246
  agents=[web_search_agent,
 
247
  image_analysis_agent,
248
+ audio_analysis_agent,
249
+ video_analysis_agent,
250
+ youtube_analysis_agent,
251
  python_coding_agent],
252
  manager_agent=manager_agent,
253
  tasks=[manager_task],