Spaces:

Samarth991
/

CV-Agent

Sleeping

File size: 6,184 Bytes

import os
import streamlit as st
from PIL import Image
from pathlib import Path
from QA_bot import tyre_synap_bot as bot 
from llm_service import get_llm
from hub_prompts import PREFIX 

from extract_tools import get_all_tools
from langchain.agents import AgentExecutor
from langchain import hub
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
from langchain.tools.render import render_text_description

import logging
import warnings
warnings.filterwarnings("ignore")

logging.basicConfig(filename="newfile.log",
                    format='%(asctime)s %(message)s',
                    filemode='w')
logger = logging.getLogger()

llm = None 
tools = None 
cv_agent = None 

@st.cache_resource
def call_llmservice_model(option,api_key):
    model = get_llm(option=option,key=api_key)
    return model 

@st.cache_resource
def setup_agent_prompt():
    prompt = hub.pull("hwchase17/react-json")
    if len(tools) == 0 :
        logger.error ("No Tools added")
    else :    
        prompt = prompt.partial(
            tools= render_text_description(tools),
            tool_names= ", ".join([t.name for t in tools]),
            additional_kwargs={
            'system_message':PREFIX,
            }
        )
    return prompt

@st.cache_resource  
def agent_initalize():
    agent_prompt = setup_agent_prompt()
    lm_with_stop = llm.bind(stop=["\nObservation"])
    #### we can use create_react_agent https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/react/agent.py
    agent = (
        {
            "input": lambda x: x["input"],
            "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
        }
        | agent_prompt
        | lm_with_stop
        | ReActJsonSingleInputOutputParser()
    )
    
    # instantiate AgentExecutor
    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True,handle_parsing_errors=True)
    return agent_executor

# def agent_initalize(tools,max_iterations=5):
#     zero_shot_agent = initialize_agent(
#         agent= AgentType.ZERO_SHOT_REACT_DESCRIPTION,
#         tools = tools,
#         llm = llm,
#         verbose = True,
#         max_iterations = max_iterations,
#         memory = None,
#         handle_parsing_errors=True,
#         agent_kwargs={
#         'system_message':PREFIX,
#         # 'format_instructions':FORMAT_INSTRUCTIONS,
#         # 'suffix':SUFFIX
#         }
#     )
#     # sys_message = PREFIX
#     # zero_shot_agent.agent.llm_chain.prompt.template = sys_message
#     return zero_shot_agent


def main():
    database_store = 'image_store'
    st.session_state.disabled = False 
    st.session_state.visibility = "visible"
    
    st.title("Computer Vision Agent :sunglasses:")
    st.markdown("Use the CV agent to do Object Detection , Panoptic Segementation,Image Segmentation , Image Descrption task using the latest foundation models available opensource.")
    st.markdown('The CV Agent implements an Agent that decide what and when to use to provide the information related to the image asked my the user.')
    st.markdown(
    """
    <style>
        section[data-testid="stSidebar"] {
            width: 350px !important; # Set the width to your desired value
        }
    </style>
    """,
    unsafe_allow_html=True,
    )
    
    with st.sidebar:
        st.header("About Project")
        st.markdown(
            """
            - CV Agent can perform check on images to detemine the image quality and can also find out the segementaion mask and panoptic mask .
            - This application uses multiple tools like Image caption tool, DuckDuckGo search tool, Maskformer tool , Panoptic segementation tool to perform these tasks.
            - The decision on how to use the certain tool and when to use it soely relies on the Reasoning power of the LLM. 
            """)
        st.sidebar.subheader("Upload Image !")
        option = st.sidebar.selectbox(
            "Select your Large Language Model(LLM) ",("deepseek-r1-distill-llama-70b",
                                                "gemma2-9b-it",
                                                "llama-3.2-3b-preview",
                                                "llama-3.2-1b-preview",
                                                "llama3-8b-8192", 
                                                "Openai", 
                                                "Google",
                                                "Ollama"),
            index=None,
            placeholder="Select LLM Service...", 
            )
        api_key = st.sidebar.text_input("API_KEY", type="password", key="password")

    uploaded_file = st.sidebar.file_uploader("Upload Image for Processing", type=['png','jpg','jpeg'])
    
    if uploaded_file is not None :
        file_path = Path(database_store, uploaded_file.name)
        if not os.path.isdir(database_store):
            os.makedirs(database_store)
        
        global llm
        llm = call_llmservice_model(option=option,api_key=api_key)
        logger.info("\tLLM Service {} Active ... !".format(llm.get_name()))
        ## extract tools 
        global tools
        tools = get_all_tools()
        logger.info("\tFound {} tools ".format(len(tools)))
        ## generate Agent 
        global agent 
        cv_agent = agent_initalize()
        logger.info('\tAgent inintalized with {} tools '.format(len(tools)))
        
        with open(file_path, mode='wb') as w:
            w.write(uploaded_file.getvalue())
        
        if os.path.isfile(file_path):
            st.sidebar.success("File uploaded successfully",icon="✅")
        
        with st.sidebar.container():
            image = Image.open(file_path)
            st.image(image,use_container_width=True)
        st.sidebar.subheader(""" 
        Examples Questions:
            - Describe about the image
            - Tell me what are the things you can detect in the image .
            - How is the image quality 
        """)
        
        bot(cv_agent,file_path)

if __name__ == '__main__':
    main()