CV-Agent / app.py
Samarth991's picture
added ultralytics
b60841d
import os
import streamlit as st
from PIL import Image
from pathlib import Path
from QA_bot import tyre_synap_bot as bot
from llm_service import get_llm
from hub_prompts import PREFIX
from extract_tools import get_all_tools
from langchain.agents import AgentExecutor
from langchain import hub
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
from langchain.tools.render import render_text_description
import logging
import warnings
warnings.filterwarnings("ignore")
logging.basicConfig(filename="newfile.log",
format='%(asctime)s %(message)s',
filemode='w')
logger = logging.getLogger()
llm = None
tools = None
cv_agent = None
@st.cache_resource
def call_llmservice_model(option,api_key):
model = get_llm(option=option,key=api_key)
return model
@st.cache_resource
def setup_agent_prompt():
prompt = hub.pull("hwchase17/react-json")
if len(tools) == 0 :
logger.error ("No Tools added")
else :
prompt = prompt.partial(
tools= render_text_description(tools),
tool_names= ", ".join([t.name for t in tools]),
additional_kwargs={
'system_message':PREFIX,
}
)
return prompt
@st.cache_resource
def agent_initalize():
agent_prompt = setup_agent_prompt()
lm_with_stop = llm.bind(stop=["\nObservation"])
#### we can use create_react_agent https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/react/agent.py
agent = (
{
"input": lambda x: x["input"],
"agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
}
| agent_prompt
| lm_with_stop
| ReActJsonSingleInputOutputParser()
)
# instantiate AgentExecutor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True,handle_parsing_errors=True)
return agent_executor
# def agent_initalize(tools,max_iterations=5):
# zero_shot_agent = initialize_agent(
# agent= AgentType.ZERO_SHOT_REACT_DESCRIPTION,
# tools = tools,
# llm = llm,
# verbose = True,
# max_iterations = max_iterations,
# memory = None,
# handle_parsing_errors=True,
# agent_kwargs={
# 'system_message':PREFIX,
# # 'format_instructions':FORMAT_INSTRUCTIONS,
# # 'suffix':SUFFIX
# }
# )
# # sys_message = PREFIX
# # zero_shot_agent.agent.llm_chain.prompt.template = sys_message
# return zero_shot_agent
def main():
database_store = 'image_store'
st.session_state.disabled = False
st.session_state.visibility = "visible"
st.title("Computer Vision Agent :sunglasses:")
st.markdown("Use the CV agent to do Object Detection , Panoptic Segementation,Image Segmentation , Image Descrption task using the latest foundation models available opensource.")
st.markdown('The CV Agent implements an Agent that decide what and when to use to provide the information related to the image asked my the user.')
st.markdown(
"""
<style>
section[data-testid="stSidebar"] {
width: 350px !important; # Set the width to your desired value
}
</style>
""",
unsafe_allow_html=True,
)
with st.sidebar:
st.header("About Project")
st.markdown(
"""
- CV Agent can perform check on images to detemine the image quality and can also find out the segementaion mask and panoptic mask .
- This application uses multiple tools like Image caption tool, DuckDuckGo search tool, Maskformer tool , Panoptic segementation tool to perform these tasks.
- The decision on how to use the certain tool and when to use it soely relies on the Reasoning power of the LLM.
""")
st.sidebar.subheader("Upload Image !")
option = st.sidebar.selectbox(
"Select your Large Language Model(LLM) ",("deepseek-r1-distill-llama-70b",
"gemma2-9b-it",
"llama-3.2-3b-preview",
"llama-3.2-1b-preview",
"llama3-8b-8192",
"Openai",
"Google",
"Ollama"),
index=None,
placeholder="Select LLM Service...",
)
api_key = st.sidebar.text_input("API_KEY", type="password", key="password")
uploaded_file = st.sidebar.file_uploader("Upload Image for Processing", type=['png','jpg','jpeg'])
if uploaded_file is not None :
file_path = Path(database_store, uploaded_file.name)
if not os.path.isdir(database_store):
os.makedirs(database_store)
global llm
llm = call_llmservice_model(option=option,api_key=api_key)
logger.info("\tLLM Service {} Active ... !".format(llm.get_name()))
## extract tools
global tools
tools = get_all_tools()
logger.info("\tFound {} tools ".format(len(tools)))
## generate Agent
global agent
cv_agent = agent_initalize()
logger.info('\tAgent inintalized with {} tools '.format(len(tools)))
with open(file_path, mode='wb') as w:
w.write(uploaded_file.getvalue())
if os.path.isfile(file_path):
st.sidebar.success("File uploaded successfully",icon="✅")
with st.sidebar.container():
image = Image.open(file_path)
st.image(image,use_container_width=True)
st.sidebar.subheader("""
Examples Questions:
- Describe about the image
- Tell me what are the things you can detect in the image .
- How is the image quality
""")
bot(cv_agent,file_path)
if __name__ == '__main__':
main()