import os import streamlit as st from PIL import Image from pathlib import Path from QA_bot import tyre_synap_bot as bot from llm_service import get_llm from hub_prompts import PREFIX from extract_tools import get_all_tools from langchain.agents import AgentExecutor from langchain import hub from langchain.agents.format_scratchpad import format_log_to_str from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser from langchain.tools.render import render_text_description import logging import warnings warnings.filterwarnings("ignore") logging.basicConfig(filename="newfile.log", format='%(asctime)s %(message)s', filemode='w') logger = logging.getLogger() llm = None tools = None cv_agent = None @st.cache_resource def call_llmservice_model(option,api_key): model = get_llm(option=option,key=api_key) return model @st.cache_resource def setup_agent_prompt(): prompt = hub.pull("hwchase17/react-json") if len(tools) == 0 : logger.error ("No Tools added") else : prompt = prompt.partial( tools= render_text_description(tools), tool_names= ", ".join([t.name for t in tools]), additional_kwargs={ 'system_message':PREFIX, } ) return prompt @st.cache_resource def agent_initalize(): agent_prompt = setup_agent_prompt() lm_with_stop = llm.bind(stop=["\nObservation"]) #### we can use create_react_agent https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/react/agent.py agent = ( { "input": lambda x: x["input"], "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]), } | agent_prompt | lm_with_stop | ReActJsonSingleInputOutputParser() ) # instantiate AgentExecutor agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True,handle_parsing_errors=True) return agent_executor # def agent_initalize(tools,max_iterations=5): # zero_shot_agent = initialize_agent( # agent= AgentType.ZERO_SHOT_REACT_DESCRIPTION, # tools = tools, # llm = llm, # verbose = True, # max_iterations = max_iterations, # memory = None, # handle_parsing_errors=True, # agent_kwargs={ # 'system_message':PREFIX, # # 'format_instructions':FORMAT_INSTRUCTIONS, # # 'suffix':SUFFIX # } # ) # # sys_message = PREFIX # # zero_shot_agent.agent.llm_chain.prompt.template = sys_message # return zero_shot_agent def main(): database_store = 'image_store' st.session_state.disabled = False st.session_state.visibility = "visible" st.title("Computer Vision Agent :sunglasses:") st.markdown("Use the CV agent to do Object Detection , Panoptic Segementation,Image Segmentation , Image Descrption task using the latest foundation models available opensource.") st.markdown('The CV Agent implements an Agent that decide what and when to use to provide the information related to the image asked my the user.') st.markdown( """ """, unsafe_allow_html=True, ) with st.sidebar: st.header("About Project") st.markdown( """ - CV Agent can perform check on images to detemine the image quality and can also find out the segementaion mask and panoptic mask . - This application uses multiple tools like Image caption tool, DuckDuckGo search tool, Maskformer tool , Panoptic segementation tool to perform these tasks. - The decision on how to use the certain tool and when to use it soely relies on the Reasoning power of the LLM. """) st.sidebar.subheader("Upload Image !") option = st.sidebar.selectbox( "Select your Large Language Model(LLM) ",("deepseek-r1-distill-llama-70b", "gemma2-9b-it", "llama-3.2-3b-preview", "llama-3.2-1b-preview", "llama3-8b-8192", "Openai", "Google", "Ollama"), index=None, placeholder="Select LLM Service...", ) api_key = st.sidebar.text_input("API_KEY", type="password", key="password") uploaded_file = st.sidebar.file_uploader("Upload Image for Processing", type=['png','jpg','jpeg']) if uploaded_file is not None : file_path = Path(database_store, uploaded_file.name) if not os.path.isdir(database_store): os.makedirs(database_store) global llm llm = call_llmservice_model(option=option,api_key=api_key) logger.info("\tLLM Service {} Active ... !".format(llm.get_name())) ## extract tools global tools tools = get_all_tools() logger.info("\tFound {} tools ".format(len(tools))) ## generate Agent global agent cv_agent = agent_initalize() logger.info('\tAgent inintalized with {} tools '.format(len(tools))) with open(file_path, mode='wb') as w: w.write(uploaded_file.getvalue()) if os.path.isfile(file_path): st.sidebar.success("File uploaded successfully",icon="✅") with st.sidebar.container(): image = Image.open(file_path) st.image(image,use_container_width=True) st.sidebar.subheader(""" Examples Questions: - Describe about the image - Tell me what are the things you can detect in the image . - How is the image quality """) bot(cv_agent,file_path) if __name__ == '__main__': main()