ankush-003 commited on
Commit
ff30164
·
verified ·
1 Parent(s): 5a56105

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +227 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ from langchain_core.messages import (
3
+ BaseMessage,
4
+ HumanMessage,
5
+ ToolMessage,
6
+ )
7
+ import base64
8
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
9
+ from langgraph.graph import END, StateGraph, START
10
+ from typing import Annotated, List
11
+ from langchain_community.tools import DuckDuckGoSearchRun
12
+ from langchain_core.tools import tool
13
+ from langchain_experimental.utilities import PythonREPL
14
+ import operator
15
+ from typing import Annotated, Sequence, TypedDict
16
+ from langchain_groq import ChatGroq
17
+ import functools
18
+ from langchain_core.messages import AIMessage
19
+ from langchain_google_genai import ChatGoogleGenerativeAI
20
+ from langgraph.prebuilt import ToolNode
21
+ from typing import Literal
22
+ import gradio as gr
23
+ import io
24
+ import PIL
25
+
26
+ load_dotenv()
27
+ llm_coder = ChatGroq(temperature=0, model_name="llama-3.1-8b-instant")
28
+ llm_image = ChatGoogleGenerativeAI(
29
+ model="gemini-1.5-flash",
30
+ temperature=0,
31
+ max_tokens=None,
32
+ timeout=None,
33
+ max_retries=2,
34
+ )
35
+
36
+ search_tool = DuckDuckGoSearchRun()
37
+ repl_tool = PythonREPL()
38
+
39
+ @tool
40
+ def python_repl(
41
+ code: Annotated[str, "The python code to execute to answer the question."],
42
+ ):
43
+ """Use this to execute python code. If you want to see the output of a value,
44
+ you should print it out with `print(...)`. This is visible to the user."""
45
+ try:
46
+ result = repl_tool.run(code)
47
+ except BaseException as e:
48
+ return f"Failed to execute. Error: {repr(e)}"
49
+ result_str = f"Successfully executed:\n```python\n{code}\n```\nStdout: {result}"
50
+ return (
51
+ result_str + "\n\nIf you have completed all tasks, respond with FINAL ANSWER."
52
+ )
53
+
54
+ def create_agent(llm, tools, system_message: str):
55
+ """Create an agent."""
56
+ prompt = ChatPromptTemplate.from_messages(
57
+ [
58
+ (
59
+ "system",
60
+ "You are a helpful AI assistant, collaborating with other assistants."
61
+ " Use the provided tools to progress towards answering the question."
62
+ " If you are unable to fully answer, that's OK, another assistant with different tools "
63
+ " will help where you left off. Execute what you can to make progress."
64
+ " If you or any of the other assistants have the final answer or deliverable,"
65
+ " prefix your response with FINAL ANSWER so the team knows to stop."
66
+ " You have access to the following tools: {tool_names}.\n{system_message}",
67
+ ),
68
+ MessagesPlaceholder(variable_name="messages"),
69
+ ]
70
+ )
71
+ prompt = prompt.partial(system_message=system_message)
72
+ prompt = prompt.partial(tool_names=", ".join([tool.name for tool in tools]))
73
+ return prompt | llm.bind_tools(tools)
74
+
75
+ class AgentState(TypedDict):
76
+ messages: Annotated[Sequence[BaseMessage], operator.add]
77
+ sender: str
78
+
79
+ def agent_node(state, agent, name):
80
+ result = agent.invoke(state)
81
+ if isinstance(result, ToolMessage):
82
+ pass
83
+ else:
84
+ result = AIMessage(**result.dict(exclude={"type", "name"}), name=name)
85
+ return {
86
+ "messages": [result],
87
+ "sender": name,
88
+ }
89
+
90
+ problem_agent = create_agent(
91
+ llm_image,
92
+ [],
93
+ system_message="You should understand the problem properly and provide a clear description with the edge cases, don't provide the solution, after completing all tasks."
94
+ )
95
+ problem_node = functools.partial(agent_node, agent=problem_agent, name="problem_agent")
96
+
97
+ solution_agent = create_agent(
98
+ llm_image,
99
+ [],
100
+ system_message="after understanding the problem, you should provide a solution to the problem in python that is clear and concise and solves all edge cases, also provide intuition behind the solution."
101
+ )
102
+ solution_node = functools.partial(agent_node, agent=solution_agent, name="solution_agent")
103
+
104
+ checker_agent = create_agent(
105
+ llm_coder,
106
+ [],
107
+ system_message="critically analyze the solution provided by the solution agent, check for correctness, efficiency, and edge cases, if the solution is correct, provide a message saying so, if not, provide a message with the error and suggest a fix."
108
+ )
109
+
110
+ def checker_node(state):
111
+ text_only_messages = []
112
+ for msg in state["messages"]:
113
+ if isinstance(msg.content, list):
114
+ text_content = [item["text"] for item in msg.content if item["type"] == "text"]
115
+ new_msg = msg.copy()
116
+ new_msg.content = " ".join(text_content)
117
+ text_only_messages.append(new_msg)
118
+ else:
119
+ text_only_messages.append(msg)
120
+
121
+ text_only_state = {
122
+ "messages": text_only_messages,
123
+ "sender": state["sender"]
124
+ }
125
+
126
+ result = checker_agent.invoke(text_only_state)
127
+ if isinstance(result, ToolMessage):
128
+ pass
129
+ else:
130
+ result = AIMessage(**result.dict(exclude={"type", "name"}), name="checker_agent")
131
+ return {
132
+ "messages": [result],
133
+ "sender": "checker_agent",
134
+ }
135
+
136
+ tools = [search_tool, python_repl]
137
+ tool_node = ToolNode(tools)
138
+
139
+ def router(state) -> Literal["call_tool", "__end__", "continue"]:
140
+ messages = state["messages"]
141
+ last_message = messages[-1]
142
+ if last_message.tool_calls:
143
+ return "call_tool"
144
+ if "FINAL ANSWER" in last_message.content:
145
+ return "__end__"
146
+ return "continue"
147
+
148
+ workflow = StateGraph(AgentState)
149
+
150
+ workflow.add_node("problem_creator", problem_node)
151
+ workflow.add_node("solution_generator", solution_node)
152
+ workflow.add_node("checker_agent", checker_node)
153
+ workflow.add_node("call_tool", tool_node)
154
+
155
+ workflow.add_conditional_edges(
156
+ "problem_creator",
157
+ router,
158
+ {"continue": "solution_generator", "call_tool": "call_tool", "__end__": END},
159
+ )
160
+ workflow.add_conditional_edges(
161
+ "solution_generator",
162
+ router,
163
+ {"continue": "checker_agent", "call_tool": "call_tool", "__end__": END},
164
+ )
165
+ workflow.add_conditional_edges(
166
+ "checker_agent",
167
+ router,
168
+ {"continue": "problem_creator", "call_tool": "call_tool", "__end__": END},
169
+ )
170
+ workflow.add_conditional_edges(
171
+ "call_tool",
172
+ lambda x: x["sender"],
173
+ {
174
+ "problem_creator": "problem_creator",
175
+ "solution_generator": "solution_generator",
176
+ "checker_agent": "checker_agent",
177
+ },
178
+ )
179
+ workflow.add_edge(START, "problem_creator")
180
+
181
+ graph = workflow.compile()
182
+
183
+ def process_images(images: List[tuple[PIL.Image.Image, str | None]]):
184
+ if not images:
185
+ return "No images uploaded"
186
+
187
+ # Convert all images to base64
188
+ image_contents = []
189
+ for (image, _) in images:
190
+ buffered = io.BytesIO()
191
+ image.save(buffered, format="PNG")
192
+ img_str = base64.b64encode(buffered.getvalue()).decode()
193
+ image_contents.append({
194
+ "type": "image_url",
195
+ "image_url": {"url": f"data:image/png;base64,{img_str}"}
196
+ })
197
+
198
+ # Create the input for the workflow
199
+ input_data = {"messages": [HumanMessage(
200
+ content = [
201
+ {"type": "text", "text": "answer the question about the following images"},
202
+ *image_contents
203
+ ]
204
+ )]}
205
+
206
+ # Run the workflow
207
+ output = []
208
+ try:
209
+ for chunk in graph.stream(input_data, {"recursion_limit": 10}, stream_mode="values"):
210
+ message = chunk["messages"][-1]
211
+ output.append(f"{message.name}: {message.content}")
212
+ except Exception as e:
213
+ output.append(f"Error: {repr(e)}")
214
+
215
+ return "\n\n".join(output)
216
+
217
+ # Create Gradio interface
218
+ iface = gr.Interface(
219
+ fn=process_images,
220
+ inputs=[gr.Gallery(label="Upload an image", type="pil")],
221
+ outputs=[gr.Markdown(label="Output", show_copy_button=True)],
222
+ title="Image Question Answering",
223
+ description="Upload an image to get it processed and answered."
224
+ )
225
+
226
+ # Launch the interface
227
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ python-dotenv
2
+ langchain-core
3
+ langgraph
4
+ langchain-community
5
+ langchain-experimental
6
+ langchain-groq
7
+ langchain-google-genai
8
+ gradio
9
+ pillow
10
+ duckduckgo-search