Spaces:

Samarth991
/

CV-Agent

Running

App Files Files Community

Samarth991 commited on 18 days ago

Commit

0e78cbf

1 Parent(s): bab4b66

adding CV agent file

Browse files

Files changed (14) hide show

.gitignore +164 -0
QA_bot.py +57 -0
app.py +169 -0
extract_tools.py +254 -0
final_mask.png +0 -0
hub_prompts.py +79 -0
llm_service.py +60 -0
requirements.txt +17 -0
tool_utils/clip_segmentation.py +55 -0
tool_utils/image_metadata.py +43 -0
tool_utils/mask2former.py +102 -0
tool_utils/object_extractor.py +24 -0
tool_utils/yolo_world.py +30 -0
utils.py +26 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,164 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+Data/
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+*.json
+image_store/

QA_bot.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import streamlit as st
+import re
+import time
+from PIL import Image
+import ast
+import numpy as np
+def reset_conversation():
+    st.session_state.messages = []
+def display_mask_image(image_path):
+    if os.path.isfile(image_path):
+        image = Image.open(image_path)
+        st.image(image, caption='Final Mask', use_column_width=True)
+def tyre_synap_bot(filter_agent,image_file_path):
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    print("Found image file path: ",image_file_path)
+    # Display chat messages from history on app rerun
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # React to user input
+    if prompt := st.chat_input("What is up?"):
+        # Display user message in chat message container
+        st.chat_message("user").markdown(prompt)
+        # Add user message to chat history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        ai_response = filter_agent.invoke(
+            {
+                "input": f'{prompt}, provided image path: {image_file_path}'
+            }
+            )
+        # ai_response = filter_agent.run(f'{prompt} provided image path :{image_file_path}')
+        response = f"Echo: {ai_response['output']}"
+        with st.chat_message("assistant"):
+            message_placeholder = st.empty()
+            full_response = ""
+            if 'mask' in ai_response['output']:
+                display_mask_image('final_mask.png')
+            for chunk in re.split(r'(\s+)', response):
+                full_response += chunk + " "
+                time.sleep(0.01)
+                # Add a blinking cursor to simulate typing
+                message_placeholder.markdown(full_response + "▌")
+        # Add assistant response to chat history
+        st.session_state.messages.append({"role": "assistant", "content": full_response})
+        st.button('Reset Chat', on_click=reset_conversation)

app.py ADDED Viewed

	@@ -0,0 +1,169 @@

+import os
+import streamlit as st
+from PIL import Image
+from pathlib import Path
+from QA_bot import tyre_synap_bot as bot
+from llm_service import get_llm
+from hub_prompts import PREFIX
+from extract_tools import get_all_tools
+from langchain.agents import AgentExecutor
+from langchain import hub
+from langchain.agents.format_scratchpad import format_log_to_str
+from langchain.agents.output_parsers import ReActJsonSingleInputOutputParser
+from langchain.tools.render import render_text_description
+import logging
+import warnings
+warnings.filterwarnings("ignore")
+logging.basicConfig(filename="newfile.log",
+                    format='%(asctime)s %(message)s',
+                    filemode='w')
+logger = logging.getLogger()
+llm = None
+tools = None
+cv_agent = None
+@st.cache_resource
+def call_llmservice_model(option,api_key):
+    model = get_llm(option=option,key=api_key)
+    return model
+@st.cache_resource
+def setup_agent_prompt():
+    prompt = hub.pull("hwchase17/react-json")
+    if len(tools) == 0 :
+        logger.error ("No Tools added")
+    else :
+        prompt = prompt.partial(
+            tools= render_text_description(tools),
+            tool_names= ", ".join([t.name for t in tools]),
+            additional_kwargs={
+            'system_message':PREFIX,
+            }
+        )
+    return prompt
+@st.cache_resource
+def agent_initalize():
+    agent_prompt = setup_agent_prompt()
+    lm_with_stop = llm.bind(stop=["\nObservation"])
+    #### we can use create_react_agent https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/agents/react/agent.py
+    agent = (
+        {
+            "input": lambda x: x["input"],
+            "agent_scratchpad": lambda x: format_log_to_str(x["intermediate_steps"]),
+        }
+        | agent_prompt
+        | lm_with_stop
+        | ReActJsonSingleInputOutputParser()
+    )
+    # instantiate AgentExecutor
+    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True,handle_parsing_errors=True)
+    return agent_executor
+# def agent_initalize(tools,max_iterations=5):
+#     zero_shot_agent = initialize_agent(
+#         agent= AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+#         tools = tools,
+#         llm = llm,
+#         verbose = True,
+#         max_iterations = max_iterations,
+#         memory = None,
+#         handle_parsing_errors=True,
+#         agent_kwargs={
+#         'system_message':PREFIX,
+#         # 'format_instructions':FORMAT_INSTRUCTIONS,
+#         # 'suffix':SUFFIX
+#         }
+#     )
+#     # sys_message = PREFIX
+#     # zero_shot_agent.agent.llm_chain.prompt.template = sys_message
+#     return zero_shot_agent
+def main():
+    database_store = 'image_store'
+    st.session_state.disabled = False
+    st.session_state.visibility = "visible"
+    st.title("Computer Vision Agent :sunglasses:")
+    st.markdown("Use the CV agent to do Object Detection/Panoptic Segementation/Image Segmentation/Image Descrption ")
+    st.markdown(
+    """
+    <style>
+        section[data-testid="stSidebar"] {
+            width: 350px !important; # Set the width to your desired value
+        }
+    </style>
+    """,
+    unsafe_allow_html=True,
+    )
+    with st.sidebar:
+        st.header("About Project")
+        st.markdown(
+            """
+            - Agent to filter images on basis multiple factors like image quality , object proportion in image , weather in the image .
+            - This application uses multiple tools like Image caption tool, DuckDuckGo search tool, Maskformer tool , weather predictor.
+            """)
+        st.sidebar.subheader("Upload Image !")
+        option = st.sidebar.selectbox(
+            "Select the Large Language Model ",("deepseek-r1-distill-llama-70b",
+                                                "gemma2-9b-it",
+                                                "llama-3.2-3b-preview",
+                                                "llama-3.2-1b-preview",
+                                                "llama3-8b-8192",
+                                                "Openai",
+                                                "Google",
+                                                "Ollama"),
+            index=None,
+            placeholder="Select LLM Service...",
+            )
+        api_key = st.sidebar.text_input("API_KEY", type="password", key="password")
+    uploaded_file = st.sidebar.file_uploader("Upload Image for Processing", type=['png','jpg','jpeg'])
+    if uploaded_file is not None :
+        file_path = Path(database_store, uploaded_file.name)
+        if not os.path.isdir(database_store):
+            os.makedirs(database_store)
+        global llm
+        llm = call_llmservice_model(option=option,api_key=api_key)
+        logger.info("\tLLM Service {} Active ... !".format(llm.get_name()))
+        ## extract tools
+        global tools
+        tools = get_all_tools()
+        logger.info("\tFound {} tools ".format(len(tools)))
+        ## generate Agent
+        global agent
+        cv_agent = agent_initalize()
+        logger.info('\tAgent inintalized with {} tools '.format(len(tools)))
+        with open(file_path, mode='wb') as w:
+            w.write(uploaded_file.getvalue())
+        if os.path.isfile(file_path):
+            st.sidebar.success("File uploaded successfully",icon="✅")
+        with st.sidebar.container():
+            image = Image.open(file_path)
+            st.image(image,use_container_width=True)
+        st.sidebar.subheader("""
+        Examples Questions:
+            - Describe about the image
+            - Tell me what are the things you can detect in the image .
+            - How is the image quality
+        """)
+        bot(cv_agent,file_path)
+if __name__ == '__main__':
+    main()

extract_tools.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import os
+import cv2
+import requests
+from PIL import Image
+import logging
+import torch
+from llm_service import get_llm
+from langchain_core.tools import tool,Tool
+from langchain_community.tools import DuckDuckGoSearchResults
+from langchain_groq import ChatGroq
+from utils import draw_panoptic_segmentation
+from tool_utils.clip_segmentation import CLIPSEG
+from tool_utils.object_extractor import create_object_extraction_chain
+from tool_utils.yolo_world import YoloWorld
+from tool_utils.image_metadata import image_brightness,variance_of_laplacian,get_signal_to_noise_ratio
+try:
+    from transformers import BlipProcessor, BlipForConditionalGeneration
+    from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation
+except ImportError as err:
+    logging.error("Import error :{}".format(err))
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+logging.info("Loading Foundation Models")
+try:
+    clipseg_model = CLIPSEG()
+except Exception as err :
+    logging.error("Unable to clipseg model {}".format(err))
+try:
+    maskformer_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-base-coco-panoptic")
+    maskformer_model = Mask2FormerForUniversalSegmentation.from_pretrained("facebook/mask2former-swin-base-coco-panoptic")
+except:
+    logging.error("Unable to Maskformer model {}".format(err))
+def get_groq_model(model_name = "gemma2-9b-it"):
+    os.environ.get("GROQ_API_KEY")
+    llm_groq = ChatGroq(model=model_name)
+    return llm_groq
+@tool
+def panoptic_image_segemntation(image_path:str)->str:
+    """
+    The tool is used to create a Panoptic segmentation mask . It uses Maskformer network to create a panoptic segmentation of all \
+    the objects present in the image . Use the tool in case user ask to create a panoptic segmentation.
+    """
+    if image_path.startswith('https'):
+        image = Image.open(requests.get(image_path, stream=True).raw).convert('RGB')
+    else:
+        image = Image.open(image_path).convert('RGB')
+    maskformer_model.to(device)
+    inputs = maskformer_processor(image, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = maskformer_model(**inputs)
+    prediction = maskformer_processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
+    save_mask_path = draw_panoptic_segmentation(maskformer_model,prediction['segmentation'],prediction['segments_info'])
+    labels = []
+    for segment in prediction['segments_info']:
+        label_names = maskformer_model.config.id2label[segment['label_id']]
+        print(label_names)
+        labels.append(label_names)
+    return 'Panoptic Segmentation image {} created with labels {} '.format(save_mask_path,labels)
+@tool
+def image_description(img_path:str)->str:
+    "Use this tool to describe the image " \
+    "The tool helps you to identify weather in the image as well "
+    hf_model = "Salesforce/blip-image-captioning-base"
+    text = ""
+    if img_path.startswith('https'):
+        image = Image.open(requests.get(img_path, stream=True).raw).convert('RGB')
+    else:
+        image = Image.open(img_path).convert('RGB')
+    try:
+        processor = BlipProcessor.from_pretrained(hf_model)
+        caption_model = BlipForConditionalGeneration.from_pretrained(hf_model).to(device)
+    except:
+        logging.error("unable to load the Blip model ")
+    logging.info("Image Caption model loaded ! ")
+    # unconditional image captioning
+    inputs = processor(image, return_tensors ='pt').to(device)
+    output = caption_model.generate(**inputs, max_new_tokens=50)
+    caption = processor.decode(output[0], skip_special_tokens=True)
+    # conditional image captioning
+    obj_text = "Total number of objects in image "
+    inputs_2 = processor(image, obj_text ,return_tensors ='pt').to(device)
+    out_2 = caption_model.generate(**inputs_2,max_new_tokens=50)
+    object_caption = processor.decode(out_2[0], skip_special_tokens=True)
+    ## clear the GPU cache
+    with torch.no_grad():
+        torch.cuda.empty_cache()
+    text = caption + " ."+ object_caption+" ."
+    return text
+@tool
+def clipsegmentation_mask(input_data:str)->str:
+    """
+    The tool helps to extract the object masks from the image.
+    For example : If you want to extract the object masks from the image use this tool.
+    """
+    data = input_data.split(",")
+    image_path = data[0]
+    object_prompts = data[1:]
+    masks = clipseg_model.get_segmentation_mask(image_path,object_prompts)
+    return masks
+@tool
+def generate_bounding_box_tool(input_data:str)->str:
+    "use this tool when its is required to detect object and provide bounding boxes for the given image and list of objects"
+    yolo_world_model= YoloWorld()
+    data = input_data.split(",")
+    image_path = data[0]
+    object_prompts = data[1:]
+    object_data = yolo_world_model.run_inference(image_path,object_prompts)
+    return object_data
+@tool
+def object_extraction(img_path:str)->str:
+    "Use this tool to identify the objects within the image"
+    hf_model = "Salesforce/blip-image-captioning-base"
+    if img_path.startswith('https'):
+        image = Image.open(requests.get(img_path, stream=True).raw).convert('RGB')
+    else:
+        image = Image.open(img_path).convert('RGB')
+    try:
+        processor = BlipProcessor.from_pretrained(hf_model)
+        caption_model = BlipForConditionalGeneration.from_pretrained(hf_model).to(device)
+    except:
+        logging.error("unable to load the Blip model ")
+    logging.info("Image Caption model loaded ! ")
+    # unconditional image captioning
+    inputs = processor(image, return_tensors ='pt').to(device)
+    output = caption_model.generate(**inputs, max_new_tokens=50)
+    llm = get_groq_model()
+    getobject_chain = create_object_extraction_chain(llm=llm)
+    extracted_objects = getobject_chain.invoke({
+        'context': processor.decode(output[0], skip_special_tokens=True)
+    }).objects
+    print("Extracted objects : ",extracted_objects)
+    ## clear the GPU cache
+    with torch.no_grad():
+        torch.cuda.empty_cache()
+    return extracted_objects.split(',')
+@tool
+def get_image_quality(image_path:str)->str:
+    """
+    This tool helps to find out the parameters of the image.The tool will determine if image is blurry or not.
+    It will also tell you if image is bright or not.
+    This tool also determines the Signal to Noise Ratio of the image as well .
+    For example Output of the tool will be :
+    example 1 : Image is blurry.Image is not bright.Signal to Noise is less than 1 - More Noise in image
+    example 2 : Image is not blurry . Image is bright.Signal to Noise is greater than 1 - More Signal in image
+    """
+    image = cv2.imread(image_path)
+    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
+    brightness_text = image_brightness(image)
+    blurry_text = variance_of_laplacian(image)
+    snr_text = get_signal_to_noise_ratio(image)
+    final_text = "Image properties are :\n{}\n{}\n{}".format(blurry_text, brightness_text,snr_text)
+    return final_text
+def get_all_tools():
+     ## bind tools
+        image_desc_tool = Tool(
+        name = 'Image_Descprtion_Tool',
+        func= image_description,
+        description =  """
+                    The tool helps to describe about the image or create a caption of the image
+                    If the user asks to decribe or genrerate a caption for the image use this tool.
+                    This tool can also be used to identify the weather within the image .
+                    user example questions :
+                        1. Describe the image ?
+                        2. What the weather looks like in the image ?
+                    """
+        )
+        clipseg_tool = Tool(
+            name = 'ClipSegmentation-tool',
+            func = clipsegmentation_mask,
+            description="""Use this tool when user ask to generate the segmentation Mask of the objects provided by the user.
+                        The input to the tool is the path of the image and list of objects for which Segmenation mask is to generated.
+                        For example :
+                        Query :Provide a segmentation mask of all road car and dog in the image
+                        The tool will generate the segmentation mask of the objects in the image.
+                        for such query from the user you need to first use the tool to identify the objects and then use this tool to
+                        generate the segmentation mask for the objects.
+                        """
+        )
+        bounding_box_generator = Tool(
+            name = 'Bounding Box Generator',
+            func = generate_bounding_box_tool,
+            description= "The tool helps to provide bounding boxes for the given image and list of objects\
+                .Use this tool when user ask to provide bounding boxes for the objects.if user has not specified the names of the objects \
+                then use the object extraction tool to identify the objects and then use this tool to generate the bounding boxes for the objects.\
+                The input to this tool is the path of the image and list of objects for which bounding boxes are to be generated"
+        )
+        object_extractor = Tool(
+            name = "Object Extraction Tool",
+            func = object_extraction,
+            description = " The Tool is used to extract objects within the image . Use this tool if user specifically ask to identify \
+                what are the objects I can view in the image or identify the objects within the image . "
+        )
+        image_parameters_tool = Tool(
+            name = 'Image Parameters_Tool',
+            func = get_image_quality,
+            description= """ This tool will help you to determine
+                - If the image is blurry or not
+                - If the image is bright/sharp or not
+                - SNR ratio of the image
+            Based on the tool output take a proper decision regarding the image quality"""
+        )
+        panoptic_segmentation = Tool(
+            name = 'panoptic_Segmentation_tool',
+            func = panoptic_image_segemntation,
+            description = "The tool is used to create a Panoptic segmentation mask . It uses Maskformer network to create a panoptic segmentation of all \
+                        the objects present in the image . Use the tool in case user ask to create a panoptic segmentation or count objects in the image.\
+                        The tool also provides a list of objects along with the mask image of the all segmented objects found in the image ."
+        )
+        tools = [
+            DuckDuckGoSearchResults(),
+            image_desc_tool,
+            clipseg_tool,
+            image_parameters_tool,
+            object_extractor,
+            bounding_box_generator,
+            panoptic_segmentation
+            ]
+        return tools

final_mask.png ADDED Viewed

hub_prompts.py ADDED Viewed

	@@ -0,0 +1,79 @@

+PREFIX = """
+You are an agent designed to filter images on the basis of image quality.You are provided with the Dashcam images from the Car.
+You have access to following tools : Image Descprtion_Tool,Object Proportion_Tool,Image Parameters_Tool,DuckDuckGoSearch_Tool
+Some examples provided about are below:
+Question: Describe about the image
+Thought: To describe the image , I must need to find a tool that describes an image.Image Descprtion_Tool desribes the image.I should use that tool.
+Action : ```json
+    {
+    "action": "Image Description_Tool",
+    "action_input": "image_store/image.jpg"
+    }
+```
+Observation : "car driving on road.The weather in the image is Stormy.
+Final Answer : Car is driving in a stromy weather.Due to stormy weather the visibility will be low.
+Question: I need to know the quality of the image.
+Thought: I must need to find a tool that describes the image quality .Image Parameters_Tool can help to find out parameters like Brightness , Blur and Noise in the image.
+Action : ```json
+    {
+    "action": "Image Description_Tool",
+    "action_input": 'image_store/image_path.jpg'
+    }
+```
+Observation : "Image is Bright enough and have a high Signal to Noise ratio >1, means the qaulity of the image is good."
+Final Answer : The Quality of the image seems good.
+Question: I need to know the quality of the image.
+Thought: I must need to find a tool that describes the image quality .Image Parameters_Tool can help to find out parameters like Brightness , Blur and Noise in the image.
+Action : ```json
+    {
+    "action": "Image Description_Tool",
+    "action_input": 'image_store/image_path.jpg'
+    }
+```
+Observation : "Image is not Bright enough and have more noise, means the qaulity of the image is bad."
+Final Answer : The Quality of the image does not seems to be good .
+Question: I need to detemine the cracks .
+Thought: I must need to find a tool that describes the image quality .Image Parameters_Tool can help to find out parameters like Brightness , Blur and Noise in the image.
+Action : ```json
+    {
+    "action": "Image Description_Tool",
+    "action_input": 'image_store/image_path.jpg'
+    }
+```
+Observation : "Image is not Bright enough and have more noise, means the qaulity of the image is bad."
+Final Answer : The Quality of the image does not seems to be good .
+Final method is "get_image_parameters". This tool helps to find out general properties of image like blurliness ,sharpness,
+brightness, Signal to Noise ratio in image.
+Use the these tools and the information provided by these tools to construct your final answer.
+If you get an error while executing a query, rewrite the query and try again.
+If the question does not seem related to the database, just return "I don't know" as the answer.
+"""
+FORMAT_INSTRUCTIONS="""Use the following format:
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can repeat N times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+"""
+SUFFIX = """You are an humble agent provide infomration point wise """

llm_service.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+from getpass import getpass
+from langchain_groq import ChatGroq
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_openai import AzureChatOpenAI
+from langchain_community.llms import Ollama
+from langchain_openai.chat_models.base import BaseChatOpenAI
+def azure_openai_service(key,max_retries=3):
+    os.environ["AZURE_OPENAI_API_KEY"] = key
+    os.environ["AZURE_OPENAI_ENDPOINT"] = "https://indus.api.michelin.com/openai-key-weu"
+    model = AzureChatOpenAI(
+    azure_deployment="gpt-4o",  # or your deployment
+    api_version="2023-06-01-preview",  # or your api version
+    temperature=0,
+    max_tokens=None,
+    timeout=None,
+    max_retries=max_retries)
+    return model
+def get_ollama():
+    ## terminal --> ollama start
+    llm = Ollama(base_url="http://localhost:11434", model="mistral")
+    return llm
+def get_googleGemini(key):
+    os.environ["GOOGLE_API_KEY"] = key
+    llm = ChatGoogleGenerativeAI(
+        model="gemini-1.5-pro",
+        temperature=0,
+        max_tokens=None,
+        timeout=None,
+        max_retries=2)
+    return llm
+def get_groq_model(key,model_name = "gemma2-9b-it"):
+    os.environ["GROQ_API_KEY"] = key
+    llm_groq = ChatGroq(model=model_name)
+    return llm_groq
+def get_llm(option,key):
+    llm = None
+    if option =='deepseek-r1-distill-llama-70b':
+        llm = get_groq_model(key,model_name = "deepseek-r1-distill-llama-70b")
+    elif option =='gemma2-9b-it':
+        llm = get_groq_model(key,model_name="gemma2-9b-it")
+    elif option == 'llama-3.2-3b-preview':
+        llm  = get_groq_model(key,model_name="llama-3.2-3b-preview")
+    elif option == 'llama-3.2-1b-preview':
+        llm = get_groq_model(key,model_name="llama-3.2-1b-preview")
+    elif option == 'llama3-8b-8192':
+        llm = get_groq_model(key,model_name="llama3-8b-8192")
+    elif option == 'Openai':
+        llm = azure_openai_service(key)
+    elif option == 'Google':
+        llm = get_googleGemini(key)
+    elif option == "Ollama" :
+        llm = get_ollama()
+    return llm

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+streamlit
+langchain
+langchain-community
+langchain-core
+langchain-text-splitters
+langchain-experimental
+langchain-google-genai
+langchain-openai
+tiktoken
+duckduckgo-search
+torch
+transformers
+langchain-groq
+jq
+scikit-learn
+PyWavelets
+scikit-image

tool_utils/clip_segmentation.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import cv2
+from matplotlib import pyplot as plt
+import torch
+import numpy as np
+from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
+from segmentation_mask_overlay import overlay_masks
+from typing import List
+import logging
+class CLIPSEG:
+    def __init__(self,model_name = "CIDAS/clipseg-rd64-refined",threshould=0.60):
+        self.clip_processor = CLIPSegProcessor.from_pretrained(model_name)
+        self.clip_model = CLIPSegForImageSegmentation.from_pretrained(model_name)
+        self.threshould = threshould
+        self.clip_model.to('cpu')
+    @staticmethod
+    def create_rgb_mask(mask,color=None):
+        color = tuple(np.random.choice(range(0,256), size=3))
+        gray_3_channel = cv2.merge((mask, mask, mask))
+        gray_3_channel[mask==255] = color
+        return gray_3_channel.astype(np.uint8)
+    def get_segmentation_mask(self,image_path:str,object_prompts:List):
+        image = cv2.cvtColor(cv2.imread(image_path),cv2.COLOR_BGR2RGB)
+        logging.info("objects found  out from the image :{}".format(object_prompts))
+        predicted_masks = []
+        inputs = self.clip_processor(
+            text=object_prompts,
+            images=[image] * len(object_prompts),
+            padding="max_length",
+            return_tensors="pt",
+            )
+        with torch.no_grad():  # Use 'torch.no_grad()' to disable gradient computation
+            outputs = self.clip_model(**inputs)
+        preds = outputs.logits.unsqueeze(1)
+        # detections = outputs.logits[0]  # Assuming class index 0
+        for i in range(preds.shape[0]):
+            predicted_mask =  torch.sigmoid(preds[i][0]).detach().cpu().numpy()
+            predicted_mask = np.where(predicted_mask>self.threshould, 255,0)
+            predicted_masks.append(predicted_mask)
+        resize_image = cv2.resize(image,(352,352))
+        mask_labels = [f"{prompt}_{i}" for i,prompt in enumerate(object_prompts)]
+        cmap = plt.cm.tab20(np.arange(len(mask_labels)))[..., :-1]
+        bool_masks = [predicted_mask.astype('bool') for predicted_mask in predicted_masks]
+        final_mask = overlay_masks(resize_image,np.stack(bool_masks,-1),labels=mask_labels,colors=cmap,alpha=0.5,beta=0.7)
+        try:
+            cv2.imwrite('final_mask.png',final_mask)
+            return 'Segmentation image created : final_mask.png'
+        except Exception as e:
+            logging.error("Error while saving the final mask :",e)
+            return "unable to create a mask image "

tool_utils/image_metadata.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import os
+import numpy as np
+import cv2
+from skimage.restoration import estimate_sigma
+import logging
+def image_brightness(image,thresh=0.37):
+    L,A,B = cv2.split(cv2.cvtColor(image,cv2.COLOR_BGR2LAB))
+    norm_L = L/np.max(L)
+    L_mean = np.mean(norm_L)
+    if L_mean > thresh:
+        return "image is Bright enough  "
+    else:
+        return "image is not bright enough "
+def variance_of_laplacian(img,threshould=250):
+    # compute the Laplacian of the image and then return the focus
+    # measure, which is simply the variance of the Laplacian
+    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+    laplacian_value = cv2.Laplacian(gray, cv2.CV_64F).var()
+    logging.info(laplacian_value)
+    if laplacian_value <= threshould:
+        return " Image is very blurry"
+    elif laplacian_value <= 3*threshould:
+        return " Image is visible but have some regions out of foucs."
+    elif laplacian_value >= 3*threshould:
+        return "Image is Very Sharp."
+def get_signal_to_noise_ratio(image):
+    snr_text = None
+    snr_value = estimate_sigma(cv2.cvtColor(image,cv2.COLOR_RGB2GRAY), average_sigmas=False)
+    logging.info(snr_value)
+    if snr_value > 1 :
+        snr_text = "Signal to Noise is greater than 1 - More Signal in image "
+    else:
+        snr_text = "Signal to Noise is less than 1 - More Noise in image "
+    return snr_text

tool_utils/mask2former.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import numpy as np
+import cv2
+import argparse
+import warnings
+try:
+    import torch as th
+    from transformers import AutoImageProcessor ,Mask2FormerModel,Mask2FormerForUniversalSegmentation
+except ImportError as error:
+    raise ('Try installing torch and Transfomers module using pip.')
+warnings.filterwarnings("ignore")
+class MASK2FORMER:
+    def __init__(self,model_name="facebook/mask2former-swin-small-ade-semantic",class_id =6): ## use large
+        self.image_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-ade-semantic")
+        self.maskformer_processor = Mask2FormerModel.from_pretrained(model_name)
+        self.maskformer_model = Mask2FormerForUniversalSegmentation.from_pretrained(model_name)
+        self.DEVICE = "cuda" if th.cuda.is_available() else 'cpu'
+        self.segment_id = class_id
+        self.maskformer_model.to(self.DEVICE)
+    def create_rgb_mask(self,mask,value=255):
+        gray_3_channel = cv2.merge((mask, mask, mask))
+        gray_3_channel[mask==value] = (255,255,255)
+        return gray_3_channel.astype(np.uint8)
+    def get_mask(self,segmentation):
+        """
+        Mask out the segment of the class from the provided segment_id
+        args : segmentation -> torch.obj - segmentation ouput from the maskformer model
+               segment_id -> class id of the object to be extracted
+        return : ndarray -> 2D Mask of the image
+        """
+        if self.segment_id == "vehicle":
+            mask = (segmentation.cpu().numpy().copy()==2) | (segmentation.cpu().numpy().copy()==5) | (segmentation.cpu().numpy().copy()== 7)
+        else:
+            mask = (segmentation.cpu().numpy() == 6)
+        visual_mask = (mask * 255).astype(np.uint8)
+        return  visual_mask #np.asarray(visual_mask)
+    def generate_road_mask(self,img):
+        """
+        Extract semantic road mask from raw image
+        args : img -> np.array - input_image
+        return : ndarray -> masked out road .
+        """
+        inputs = self.image_processor(img, return_tensors="pt")
+        inputs = inputs.to(self.DEVICE)
+        with th.no_grad():
+            outputs = self.maskformer_model(**inputs)
+        segmentation = self.image_processor.post_process_semantic_segmentation(outputs,target_sizes=[(img.shape[0],img.shape[1])])[0]
+        segmented_mask = self.get_mask(segmentation=segmentation)
+        return  segmented_mask
+    def get_rgb_mask(self,img,segmented_mask):
+        """
+        Extract RGB road image and removing the background .
+        args: img -> ndarray - raw image
+              segmented_mask - binary mask from the semantic segmentation
+        return : ndarray -> RGB road image with background pixels as 0.
+        """
+        predicted_rgb_mask = self.create_rgb_mask(segmented_mask)
+        rgb_mask_img = cv2.bitwise_and(img,predicted_rgb_mask )
+        return rgb_mask_img
+    def run_inference(self,image_name):
+        """
+        Function used to create a segmentation mask for specific segment_id provided. The function uses
+        "facebook/maskformer-swin-small-coco" maskformer model to extract segmentation mask for the provided image
+        args: image_name -> str/numpy_array- image path read and processed by maskformer .
+              out_path -> str - output path save the masked output
+              skip_read -> bool- If provided image is nd_array skip_read == True else False
+              segment_id -> int- id value to extract maks Default value is 100 for road
+        """
+        input_image = cv2.cvtColor( cv2.imread(image_name),cv2.COLOR_BGR2RGB)
+        road_mask = self.generate_road_mask(input_image)
+        road_image = self.get_rgb_mask(input_image,road_mask)
+        obj_prop = round((np.count_nonzero(road_image) / np.size(road_image)) * 100, 1)
+        ## empty gou cache
+        with th.no_grad():
+            th.cuda.empty_cache()
+        return obj_prop
+def main(args):
+    mask2former = ROADMASK_WITH_MASK2FORMER()
+    input_image = cv2.cvtColor( cv2.imread(args.image_path),cv2.COLOR_BGR2RGB)
+    road_mask = mask2former.generate_road_mask(input_image)
+    road_image = mask2former.get_rgb_mask(input_image,road_mask)
+    obj_prop = round(np.count_nonzero(road_image) / np.size(road_image) * 100, 1)
+    return road_mask , road_image , obj_prop
+if __name__=="__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-image_path',help='raw_image_path', required=True)
+    args = parser.parse_args()
+    main(args)

tool_utils/object_extractor.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from pydantic import BaseModel , Field
+from langchain_core.prompts import PromptTemplate
+class objects_identified(BaseModel):
+    objects : str = Field(...,description="Generate a list of objects identified from the given description of the image")
+def objectextractor_prompt():
+    template = """
+                You are an AI assistant provided with a context below. The context is a description of an image. Your task is to identify the objects within the image.
+                The objects must be living beings or physical items or things  that one can view, feel, and touch. these objects must be nouns and not verbs or adjectives or  words describing an object like
+                'large', 'beautiful' etc .
+                Only provide the name of the object and not the description of the object.Refer the example input and output for better understanding.
+                If the conntext mention a boy , a girl , women , girls they will come under the category of "People". so the object for such classes will be people .
+                Example Input: Context: "A park filled with men and women , a large oak tree standing in the center, a dog running near a bench, and a bicycle leaning against a nearby fence."
+                Example Output:"People" , "Dog" , "Bench" , "Bicycle" ,"Fence"
+                Context: {context}
+                """
+    prompt = PromptTemplate(template=template,input_variables=["context"])
+    return prompt
+def create_object_extraction_chain(llm):
+    object_extraction_chain = objectextractor_prompt() | llm.with_structured_output(objects_identified)
+    return object_extraction_chain

tool_utils/yolo_world.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+import logging
+import numpy as np
+from typing import List
+from ultralytics import YOLOWorld
+class YoloWorld:
+    def __init__(self,model_name = "yolov8x-worldv2.pt"):
+        self.model = YOLOWorld(model_name)
+        self.model.to(device='cpu')
+    def run_inference(self,image_path:str,object_prompts:List):
+        object_details = []
+        self.model.set_classes(object_prompts)
+        results = self.model.predict(image_path)
+        for result in results:
+            for box in result.boxes:
+                object_data = {}
+                x1, y1, x2, y2 = np.array(box.xyxy.cpu(), dtype=np.int32).squeeze()
+                c1,c2  = (x1,y1),(x2,y2)
+                confidence = round(float(box.conf.cpu()),2)
+                label = f'{results[0].names[int(box.cls)]}' # [{100*round(confidence,2)}%]'
+                print("Object Name :{} Bounding Box:{},{} Confidence score {}\n ".format(label ,c1 ,c2,confidence))
+                object_data[label] = {
+                    'bounding_box':[x1,y1,x2,y2],
+                    'confidence':confidence
+                    }
+                object_details.append(object_data)
+        return object_details

utils.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from collections import defaultdict
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib import cm
+import torch
+def draw_panoptic_segmentation(model,segmentation, segments_info):
+    # get the used color map
+    viridis = cm.get_cmap('viridis', torch.max(segmentation))
+    fig, ax = plt.subplots()
+    ax.imshow(segmentation.cpu().numpy())
+    instances_counter = defaultdict(int)
+    handles = []
+    # for each segment, draw its legend
+    for segment in segments_info:
+        segment_id = segment['id']
+        segment_label_id = segment['label_id']
+        segment_label = model.config.id2label[segment_label_id]
+        label = f"{segment_label}-{instances_counter[segment_label_id]}"
+        instances_counter[segment_label_id] += 1
+        color = viridis(segment_id)
+        handles.append(mpatches.Patch(color=color, label=label))
+    # ax.legend(handles=handles)
+    fig.savefig('final_mask.png')
+    return 'final_mask.png'