File size: 10,865 Bytes
58787c8 00aced7 adf9d94 1c7a0a7 15b8627 1c7a0a7 617431a 1c7a0a7 257d222 f06be5d 230d96d 2c6f8d9 ce7387d 27f2401 ad1b760 27f2401 2c6f8d9 81edcab ad1b760 4c6d89e 27f2401 86f2a58 230d96d df58f18 230d96d 3bd69bd 5028b6b 7ddb52b f407c48 7bc23ab 948a6f4 ed46760 b95f33c 617431a b95f33c 845c4a7 b95f33c 617431a 845c4a7 b95f33c 845c4a7 617431a a0c3d4c 58787c8 a0c3d4c 617431a 845c4a7 617431a b3a3878 617431a 845c4a7 617431a 845c4a7 53643e1 148f2b3 b95f33c 148f2b3 b95f33c 617431a 148f2b3 b95f33c 148f2b3 ad1b760 a0c3d4c 617431a b3a3878 ad1b760 617431a 148f2b3 617431a ad1b760 148f2b3 ad1b760 148f2b3 ad1b760 845c4a7 ed46760 8439f94 35828ac 1c7a0a7 617431a 7da0809 eea8c7f 617431a 8439f94 617431a 8439f94 617431a b95f33c eea8c7f 69b460d 750bbf8 b95f33c 7349562 eea8c7f 8439f94 b95f33c 6bf14de 3981c3e b40cc33 b95f33c f855987 1c7a0a7 7f78cfc 617431a b95f33c 35828ac 3981c3e b40cc33 fe51214 f855987 35828ac b95f33c 148f2b3 b95f33c 148f2b3 b95f33c 617431a a412583 b40cc33 148f2b3 fd399dc fa21952 ea40888 4453360 bec1a98 c70f203 4453360 ed46760 b05f917 a412583 148f2b3 ea40888 b05f917 4453360 7da0809 35828ac 0b498b7 617431a b95f33c 617431a b95f33c 617431a a412583 f155629 c70f203 ab1c2b7 ed46760 27f2401 750bbf8 27f2401 1e57e78 fe96564 fa21952 948a6f4 750bbf8 1312508 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
import cv2, os
from crewai import Agent, Crew, Process, Task
from crewai.tools import tool
from crewai_tools import (
CodeInterpreterTool,
SerperDevTool,
WebsiteSearchTool
)
from openai import OpenAI
from openinference.instrumentation.crewai import CrewAIInstrumentor
from phoenix.otel import register
from util import get_final_answer, get_img_b64
## LLMs
MANAGER_MODEL = "gpt-4.1"
AGENT_MODEL = "gpt-4.1-mini"
FINAL_ANSWER_MODEL = "gpt-4.5-preview"
AUDIO_MODEL = "gpt-4o-transcribe"
IMAGE_MODEL = "gpt-4.1"
VIDEO_MODEL = "gpt-4.1-mini"
# LLM evaluation
PHOENIX_API_KEY = os.environ["PHOENIX_API_KEY"]
os.environ["PHOENIX_CLIENT_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
os.environ["PHOENIX_COLLECTOR_ENDPOINT"] = "https://app.phoenix.arize.com"
tracer_provider = register(
auto_instrument=True,
project_name="gaia"
)
CrewAIInstrumentor().instrument(tracer_provider=tracer_provider)
def run_crew(question, file_path):
# Custom tools
@tool("Audio Analysis Tool")
def audio_analysis_tool(question: str, file_path: str) -> str:
"""Answer a question about an audio file.
Args:
question (str): Question about the audio file
file_path (str): Path of the audio file
Returns:
str: Answer to the question about the audio file
Raises:
FileNotFoundError: If the audio file does not exist
RuntimeError: If processing fails"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"Audio file not found: {file_path}")
try:
client = OpenAI()
transcript = client.audio.transcriptions.create(
file=open(file_path, "rb"),
model=AUDIO_MODEL,
prompt=question
)
return transcript.text
except Exception as e:
raise RuntimeError(f"Failed to process audio: {str(e)}")
@tool("Image Analysis Tool")
def image_analysis_tool(question: str, file_path: str) -> str:
"""Answer a question about an image file.
Args:
question (str): Question about the image file
file_path (str): Path of the image file
Returns:
str: Answer to the question about the image file
Raises:
FileNotFoundError: If the image file does not exist
RuntimeError: If processing fails"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"Image file not found: {file_path}")
try:
# Get image
img_b64 = get_img_b64(file_path)
# OpenAI
client = OpenAI()
completion = client.chat.completions.create(
messages=[{"role": "user",
"content": [{"type": "text", "text": question},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}]}],
model=IMAGE_MODEL
)
return completion.choices[0].message.content
except Exception as e:
raise RuntimeError(f"Failed to process image: {str(e)}")
@tool("YouTube Audio Analysis Tool")
def youtube_audio_analysis_tool(question: str, url: str) -> str:
"""Answer an audio question about a YouTube video.
Args:
question (str): Audio question about YouTube video
url (str): YouTube URL
Returns:
str: Answer to the audio question about YouTube video
Raises:
RuntimeError: If processing fails"""
try:
# YouTube (hack to deal with access issues)
if url.endswith("1htKBjuUWec"):
file_path = "data/1htKBjuUWec.mp4"
else:
raise RuntimeError()
# OpenAI
client = OpenAI()
transcription = client.audio.transcriptions.create(
file=open(file_path, "rb"),
model=AUDIO_MODEL,
prompt=question
)
return transcription.text
except Exception as e:
raise RuntimeError(f"Failed to process audio: {str(e)}")
@tool("YouTube Image Analysis Tool")
def youtube_image_analysis_tool(question: str, url: str) -> str:
"""Answer an image question about a YouTube video.
Args:
question (str): Image question about YouTube video
url (str): YouTube URL
Returns:
str: Answer to the image question about YouTube video
Raises:
RuntimeError: If processing fails"""
try:
# YouTube (hack to deal with access issues)
if url.endswith("L1vXCYZAYYM"):
file_path = "data/L1vXCYZAYYM.mp4"
else:
raise RuntimeError()
# Get video TODOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo
video = cv2.VideoCapture(file_path)
base64Frames = []
while video.isOpened():
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode(".jpg", frame)
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
video.release()
# OpenAI
client = OpenAI()
response = client.responses.create(
input=[{"role": "user",
"content": [{"type": "input_text", "text": (question)},
*[{"type": "input_image", "image_url": f"data:image/jpeg;base64,{frame}"} for frame in base64Frames]]}],
model=VIDEO_MODEL
)
return response.output_text
except Exception as e:
raise RuntimeError(f"Failed to process video: {str(e)}")
# Built-in tools
web_search_tool = SerperDevTool()
web_rag_tool = WebsiteSearchTool()
python_coding_tool = CodeInterpreterTool()
# Agents
web_search_agent = Agent(
role="Web Search Agent",
goal="Search the web to help answer question \"{question}\", then scrape the most relevant web page.",
backstory="As an expert web search assistant, you search the web to help answer the question.",
allow_delegation=False,
llm=AGENT_MODEL,
max_iter=3,
tools=[web_search_tool, web_rag_tool],
verbose=False
)
audio_analysis_agent = Agent(
role="Audio Analysis Agent",
goal="Analyze audio to help answer question \"{question}\"",
backstory="As an expert audio analysis assistant, you analyze the audio to help answer the question.",
allow_delegation=False,
llm=AGENT_MODEL,
max_iter=3,
tools=[audio_analysis_tool],
verbose=False
)
image_analysis_agent = Agent(
role="Image Analysis Agent",
goal="Analyze image to help answer question \"{question}\"",
backstory="As an expert image analysis assistant, you analyze the image to help answer the question.",
allow_delegation=False,
llm=AGENT_MODEL,
max_iter=3,
tools=[image_analysis_tool],
verbose=False
)
youtube_audio_analysis_agent = Agent(
role="YouTube Audio Analysis Agent",
goal="Analyze YouTube video to help answer audio question \"{question}\"",
backstory="As an expert YouTube audio analysis assistant, you analyze the video to help answer the question.",
allow_delegation=False,
llm=AGENT_MODEL,
max_iter=3,
tools=[youtube_audio_analysis_tool],
verbose=False
)
youtube_image_analysis_agent = Agent(
role="YouTube Image Analysis Agent",
goal="Analyze YouTube video to help answer image question \"{question}\"",
backstory="As an expert YouTube image analysis assistant, you analyze the video to help answer the question.",
allow_delegation=False,
llm=AGENT_MODEL,
max_iter=3,
tools=[youtube_image_analysis_tool],
verbose=False
)
python_coding_agent = Agent(
role="Python Coding Agent",
goal="Write and/or execute Python code to help answer question \"{question}\"",
backstory="As an expert Python coding assistant, you write and/or execute Python code to help answer the question.",
allow_delegation=False,
llm=AGENT_MODEL,
max_iter=10,
tools=[python_coding_tool],
verbose=False
)
manager_agent = Agent(
role="Manager Agent",
goal="Try to answer the following question. If needed, delegate to one or more of your coworkers for help. "
"If there is no good coworker, delegate to the Python Coding Agent to implement a tool for the task. "
"Question: \"{question}\"",
backstory="As an expert manager assistant, you answer the question.",
allow_delegation=True,
llm=MANAGER_MODEL,
max_iter=5,
verbose=True
)
# Task
manager_task = Task(
agent=manager_agent,
description="Try to answer the following question. If needed, delegate to one or more of your coworkers for help. Question: \"{question}\"",
expected_output="The answer to the question."
)
# Crew
crew = Crew(
agents=[web_search_agent,
audio_analysis_agent,
image_analysis_agent,
youtube_audio_analysis_agent,
youtube_image_analysis_agent,
python_coding_agent],
manager_agent=manager_agent,
tasks=[manager_task],
verbose=True
)
# Process
if file_path:
question = f"{question} File path: {file_path}."
if file_path.endswith(".py"):
with open(f"{file_path}", "r") as file:
question = f"{question} File data:\n{file.read()}"
initial_answer = crew.kickoff(inputs={"question": question})
final_answer = get_final_answer(FINAL_ANSWER_MODEL, question, str(initial_answer))
print(f"Question: {question}")
print(f"Initial answer: {initial_answer}")
print(f"Final answer: {final_answer}")
return final_answer |