File size: 6,184 Bytes
0e78cbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b60841d
 
0e78cbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b60841d
 
 
0e78cbf
 
 
b60841d
0e78cbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import os
import streamlit as st
from PIL import Image
from pathlib import Path
from QA_bot import tyre_synap_bot as bot 
from llm_service import get_llm
from hub_prompts import PREFIX 

from extract_tools import get_all_tools
from langchain.agents import AgentExecutor
from langchain import hub
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
from langchain.tools.render import render_text_description

import logging
import warnings
warnings.filterwarnings("ignore")

logging.basicConfig(filename="newfile.log",
                    format='%(asctime)s %(message)s',
                    filemode='w')
logger = logging.getLogger()

llm = None 
tools = None 
cv_agent = None 

@st.cache_resource
def call_llmservice_model(option,api_key):
    model = get_llm(option=option,key=api_key)
    return model 

@st.cache_resource
def setup_agent_prompt():
    prompt = hub.pull("hwchase17/react-json")
    if len(tools) == 0 :
        logger.error ("No Tools added")
    else :    
        prompt = prompt.partial(
            tools= render_text_description(tools),
            tool_names= ", ".join([t.name for t in tools]),
            additional_kwargs={
            'system_message':PREFIX,
            }
        )
    return prompt

@st.cache_resource  
def agent_initalize():
    agent_prompt = setup_agent_prompt()
    lm_with_stop = llm.bind(stop=["\nObservation"])
    #### we can use create_react_agent https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/react/agent.py
    agent = (
        {
            "input": lambda x: x["input"],
            "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
        }
        | agent_prompt
        | lm_with_stop
        | ReActJsonSingleInputOutputParser()
    )
    
    # instantiate AgentExecutor
    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True,handle_parsing_errors=True)
    return agent_executor

# def agent_initalize(tools,max_iterations=5):
#     zero_shot_agent = initialize_agent(
#         agent= AgentType.ZERO_SHOT_REACT_DESCRIPTION,
#         tools = tools,
#         llm = llm,
#         verbose = True,
#         max_iterations = max_iterations,
#         memory = None,
#         handle_parsing_errors=True,
#         agent_kwargs={
#         'system_message':PREFIX,
#         # 'format_instructions':FORMAT_INSTRUCTIONS,
#         # 'suffix':SUFFIX
#         }
#     )
#     # sys_message = PREFIX
#     # zero_shot_agent.agent.llm_chain.prompt.template = sys_message
#     return zero_shot_agent


def main():
    database_store = 'image_store'
    st.session_state.disabled = False 
    st.session_state.visibility = "visible"
    
    st.title("Computer Vision Agent :sunglasses:")
    st.markdown("Use the CV agent to do Object Detection , Panoptic Segementation,Image Segmentation , Image Descrption task using the latest foundation models available opensource.")
    st.markdown('The CV Agent implements an Agent that decide what and when to use to provide the information related to the image asked my the user.')
    st.markdown(
    """
    <style>
        section[data-testid="stSidebar"] {
            width: 350px !important; # Set the width to your desired value
        }
    </style>
    """,
    unsafe_allow_html=True,
    )
    
    with st.sidebar:
        st.header("About Project")
        st.markdown(
            """
            - CV Agent can perform check on images to detemine the image quality and can also find out the segementaion mask and panoptic mask .
            - This application uses multiple tools like Image caption tool, DuckDuckGo search tool, Maskformer tool , Panoptic segementation tool to perform these tasks.
            - The decision on how to use the certain tool and when to use it soely relies on the Reasoning power of the LLM. 
            """)
        st.sidebar.subheader("Upload Image !")
        option = st.sidebar.selectbox(
            "Select your Large Language Model(LLM) ",("deepseek-r1-distill-llama-70b",
                                                "gemma2-9b-it",
                                                "llama-3.2-3b-preview",
                                                "llama-3.2-1b-preview",
                                                "llama3-8b-8192", 
                                                "Openai", 
                                                "Google",
                                                "Ollama"),
            index=None,
            placeholder="Select LLM Service...", 
            )
        api_key = st.sidebar.text_input("API_KEY", type="password", key="password")

    uploaded_file = st.sidebar.file_uploader("Upload Image for Processing", type=['png','jpg','jpeg'])
    
    if uploaded_file is not None :
        file_path = Path(database_store, uploaded_file.name)
        if not os.path.isdir(database_store):
            os.makedirs(database_store)
        
        global llm
        llm = call_llmservice_model(option=option,api_key=api_key)
        logger.info("\tLLM Service {} Active ... !".format(llm.get_name()))
        ## extract tools 
        global tools
        tools = get_all_tools()
        logger.info("\tFound {} tools ".format(len(tools)))
        ## generate Agent 
        global agent 
        cv_agent = agent_initalize()
        logger.info('\tAgent inintalized with {} tools '.format(len(tools)))
        
        with open(file_path, mode='wb') as w:
            w.write(uploaded_file.getvalue())
        
        if os.path.isfile(file_path):
            st.sidebar.success("File uploaded successfully",icon="✅")
        
        with st.sidebar.container():
            image = Image.open(file_path)
            st.image(image,use_container_width=True)
        st.sidebar.subheader(""" 
        Examples Questions:
            - Describe about the image
            - Tell me what are the things you can detect in the image .
            - How is the image quality 
        """)
        
        bot(cv_agent,file_path)

if __name__ == '__main__':
    main()