audit_assistant

Sleeping

App Files Files Community

mtyrrell commited on Feb 13

Commit

72cb6c4

1 Parent(s): 427f510

refactor

Browse files

Files changed (5) hide show

app.py +14 -20
auditqa/process_chunks.py +1 -6
auditqa/reader.py +29 -20
auditqa/retriever.py +0 -7
auditqa/utils.py +10 -10

app.py CHANGED Viewed

@@ -14,20 +14,12 @@ from auditqa.retriever import get_context
 from auditqa.reader import nvidia_client, dedicated_endpoint
 from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip
 from dotenv import load_dotenv
 from threading import Lock
 from gradio.routes import Request
 from qdrant_client import QdrantClient
 import json
-# TESTING DEBUG LOG
-from auditqa.logging_config import setup_logging
-setup_logging()
-import logging
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
-load_dotenv()
 # # fetch tokens and model config params
 SPACES_LOG = os.environ["SPACES_LOG"]
 SPACES_LOG = os.getenv('SPACES_LOG')
@@ -50,7 +42,16 @@ scheduler = CommitScheduler(
      every=2) # TESTING: every 2 seconds
 #####--------------- VECTOR STORE -------------------------------------------------
-# Configure cloud Qdrant client
 def get_cloud_qdrant():
     from langchain_community.embeddings import HuggingFaceEmbeddings
     from langchain_community.vectorstores import Qdrant
@@ -102,13 +103,11 @@ def submit_feedback(feedback, logs_data):
     """Handle feedback submission"""
     try:
         if logs_data is None:
-            logger.error("No logs data available for feedback")
             return gr.update(visible=False), gr.update(visible=True)
         save_logs(scheduler, JSON_DATASET_PATH, logs_data, feedback)
         return gr.update(visible=False), gr.update(visible=True)
     except Exception as e:
-        logger.error(f"Error saving feedback: {e}")
         # Still need to return the expected outputs even on error
         return gr.update(visible=False), gr.update(visible=True)
@@ -149,16 +148,13 @@ async def chat(query, history, sources, reports, subtype, year, client_ip=None,
     if not session_id: # Session managment
         session_id = session_manager.create_session(client_ip)
-        logger.debug(f"Created new session: {session_id}")
     else:
         session_manager.update_session(session_id)
-        logger.debug(f"Updated existing session: {session_id}")
     # Get session data
     session_data = session_manager.get_session_data(session_id)
     session_duration = session_manager.get_session_duration(session_id)
-    logger.debug(f"Session duration calculated: {session_duration}")
     print(f">> NEW QUESTION : {query}")
     print(f"history:{history}")
     print(f"sources:{sources}")
@@ -232,7 +228,6 @@ async def chat(query, history, sources, reports, subtype, year, client_ip=None,
         "answer": "",
         "time": timestamp,
     }
-    logger.debug(f"Logs data before save: {json.dumps(logs_data, indent=2)}")
     if model_config.get('reader','TYPE') == 'NVIDIA':
         chat_model = nvidia_client()
@@ -291,7 +286,6 @@ async def chat(query, history, sources, reports, subtype, year, client_ip=None,
                     await asyncio.sleep(0.05)
             except Exception as e:
-                logger.error(f"Error in process_stream: {str(e)}")
                 raise
         async for update in process_stream():
@@ -300,9 +294,9 @@ async def chat(query, history, sources, reports, subtype, year, client_ip=None,
     try:
         # Save log after streaming is complete
         save_logs(scheduler, JSON_DATASET_PATH, logs_data)
-        logger.debug(f"Logs saved successfully")
     except Exception as e:
-        logger.error(f"Error saving logs: {str(e)}")

 from auditqa.reader import nvidia_client, dedicated_endpoint
 from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip
 from dotenv import load_dotenv
+load_dotenv()
 from threading import Lock
 from gradio.routes import Request
 from qdrant_client import QdrantClient
 import json
 # # fetch tokens and model config params
 SPACES_LOG = os.environ["SPACES_LOG"]
 SPACES_LOG = os.getenv('SPACES_LOG')
      every=2) # TESTING: every 2 seconds
 #####--------------- VECTOR STORE -------------------------------------------------
+# reports contain the already created chunks from Markdown version of pdf reports
+# document processing was done using : https://github.com/axa-group/Parsr
+# We need to create the local vectorstore collection once using load_chunks
+# vectorestore colection are stored on persistent storage so this needs to be run only once
+# hence, comment out line below when creating for first time
+#vectorstores = load_new_chunks()
+# once the vectore embeddings  are created we will use qdrant client to access these
+# vectorstores = get_local_qdrant()
+# Configure cloud Qdrant client #TESTING
 def get_cloud_qdrant():
     from langchain_community.embeddings import HuggingFaceEmbeddings
     from langchain_community.vectorstores import Qdrant
     """Handle feedback submission"""
     try:
         if logs_data is None:
             return gr.update(visible=False), gr.update(visible=True)
         save_logs(scheduler, JSON_DATASET_PATH, logs_data, feedback)
         return gr.update(visible=False), gr.update(visible=True)
     except Exception as e:
         # Still need to return the expected outputs even on error
         return gr.update(visible=False), gr.update(visible=True)
     if not session_id: # Session managment
         session_id = session_manager.create_session(client_ip)
     else:
         session_manager.update_session(session_id)
     # Get session data
     session_data = session_manager.get_session_data(session_id)
     session_duration = session_manager.get_session_duration(session_id)
     print(f">> NEW QUESTION : {query}")
     print(f"history:{history}")
     print(f"sources:{sources}")
         "answer": "",
         "time": timestamp,
     }
     if model_config.get('reader','TYPE') == 'NVIDIA':
         chat_model = nvidia_client()
                     await asyncio.sleep(0.05)
             except Exception as e:
                 raise
         async for update in process_stream():
     try:
         # Save log after streaming is complete
         save_logs(scheduler, JSON_DATASET_PATH, logs_data)
     except Exception as e:
+        raise

auditqa/process_chunks.py CHANGED Viewed

@@ -17,11 +17,6 @@ from pathlib import Path
 device = 'cuda' if cuda.is_available() else 'cpu'
 path_to_data = "./reports/"
-# TESTING DEBUG LOG
-from auditqa.logging_config import setup_logging
-setup_logging()
-import logging
-logger = logging.getLogger(__name__)
 ##---------------------functions -------------------------------------------##
@@ -125,7 +120,7 @@ def load_new_chunks():
     """
     this method reads through the files and report_list to create the vector database
     """
-    logger.info("Loading new chunks")
     #  we iterate through the files which contain information about its
     # 'source'=='category', 'subtype', these are used in UI for document selection
     #  which will be used later for filtering database

 device = 'cuda' if cuda.is_available() else 'cpu'
 path_to_data = "./reports/"
 ##---------------------functions -------------------------------------------##
     """
     this method reads through the files and report_list to create the vector database
     """
     #  we iterate through the files which contain information about its
     # 'source'=='category', 'subtype', these are used in UI for document selection
     #  which will be used later for filtering database

auditqa/reader.py CHANGED Viewed

@@ -7,48 +7,57 @@ import os
 from dotenv import load_dotenv
 load_dotenv()
-# TESTING DEBUG LOG
-from auditqa.logging_config import setup_logging
-setup_logging()
-import logging
-logger = logging.getLogger(__name__)
 model_config = getconfig("model_params.cfg")
 # NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"] #TESTING
 HF_token = os.environ["LLAMA_3_1"]
 def nvidia_client():
-    logger.info("NVIDIA client activated")
     """ returns the nvidia server client """
-    try:
-        NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"]
-        client = InferenceClient(
-            base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
-            api_key=NVIDIA_SERVER)
-        print("getting nvidia client")
-        return client
-    except KeyError:
-        raise KeyError("NVIDIA_SERVERLESS environment variable not set. Required for NVIDIA endpoint.")
 # TESTING VERSION
 def dedicated_endpoint():
-    logger.info("Serverless endpoint activated")
     try:
         HF_token = os.environ["LLAMA_3_1"]
         if not HF_token:
             raise ValueError("LLAMA_3_1 environment variable is empty")
         model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
-        logger.info(f"Initializing InferenceClient with model: {model_id}")
         client = InferenceClient(
             model=model_id,
             api_key=HF_token,
         )
-        logger.info("Serverless InferenceClient initialization successful")
         return client
     except Exception as e:
-        logger.error(f"Error initializing dedicated endpoint: {str(e)}")
         raise

 from dotenv import load_dotenv
 load_dotenv()
 model_config = getconfig("model_params.cfg")
 # NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"] #TESTING
 HF_token = os.environ["LLAMA_3_1"]
 def nvidia_client():
     """ returns the nvidia server client """
+    client = InferenceClient(
+    base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
+    api_key=NVIDIA_SERVER)
+    print("getting nvidia client")
+    return client
 # TESTING VERSION
 def dedicated_endpoint():
     try:
         HF_token = os.environ["LLAMA_3_1"]
         if not HF_token:
             raise ValueError("LLAMA_3_1 environment variable is empty")
         model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
         client = InferenceClient(
             model=model_id,
             api_key=HF_token,
         )
         return client
     except Exception as e:
         raise
+# def dedicated_endpoint():
+#     """ returns the dedicated server endpoint"""
+#      # Set up the streaming callback handler
+#     callback = StreamingStdOutCallbackHandler()
+#     # Initialize the HuggingFaceEndpoint with streaming enabled
+#     llm_qa = HuggingFaceEndpoint(
+#         endpoint_url=model_config.get('reader', 'DEDICATED_ENDPOINT'),
+#         max_new_tokens=int(model_config.get('reader','MAX_TOKENS')),
+#         repetition_penalty=1.03,
+#         timeout=70,
+#         huggingfacehub_api_token=HF_token,
+#         streaming=True, # Enable streaming for real-time token generation
+#         callbacks=[callback] # Add the streaming callback handler
+#     )
+#     # Create a ChatHuggingFace instance with the streaming-enabled endpoint
+#     chat_model = ChatHuggingFace(llm=llm_qa)
+#     print("getting dedicated endpoint wrapped in ChathuggingFace ")
+#     return chat_model

auditqa/retriever.py CHANGED Viewed

@@ -4,11 +4,6 @@ from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import CrossEncoderReranker
 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
-# TESTING DEBUG LOG
-from auditqa.logging_config import setup_logging
-setup_logging()
-import logging
-logger = logging.getLogger(__name__)
 model_config = getconfig("model_params.cfg")
@@ -42,7 +37,6 @@ def create_filter(reports:list = [],sources:str =None,
 def get_context(vectorstore,query,reports,sources,subtype,year):
-    logger.info("Retriever activated")
     # create metadata filter
     # filter = create_filter(reports=reports,sources=sources,subtype=subtype,year=year)
     filter = None #TESTING
@@ -60,7 +54,6 @@ def get_context(vectorstore,query,reports,sources,subtype,year):
         )
     context_retrieved = compression_retriever.invoke(query)
-    logger.info(f"retrieved paragraphs:{len(context_retrieved)}")
     print(f"retrieved paragraphs:{len(context_retrieved)}")
     return context_retrieved

 from langchain.retrievers.document_compressors import CrossEncoderReranker
 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 model_config = getconfig("model_params.cfg")
 def get_context(vectorstore,query,reports,sources,subtype,year):
     # create metadata filter
     # filter = create_filter(reports=reports,sources=sources,subtype=subtype,year=year)
     filter = None #TESTING
         )
     context_retrieved = compression_retriever.invoke(query)
     print(f"retrieved paragraphs:{len(context_retrieved)}")
     return context_retrieved

auditqa/utils.py CHANGED Viewed

@@ -8,14 +8,9 @@ from langchain.schema import (
 import requests
 from datetime import datetime
 from uuid import uuid4
-# TESTING DEBUG LOG
-from auditqa.logging_config import setup_logging
-setup_logging()
-import logging
-logger = logging.getLogger(__name__)
 def save_logs(scheduler, JSON_DATASET_PATH, logs, feedback=None) -> None:
     """ Every interaction with app saves the log of question and answer,
@@ -30,9 +25,7 @@ def save_logs(scheduler, JSON_DATASET_PATH, logs, feedback=None) -> None:
             with open(JSON_DATASET_PATH, 'a') as f:
                 json.dump(logs, f)
                 f.write("\n")
-            logger.info("logging done")
     except Exception as e:
-        logger.error(f"Failed to save logs to {JSON_DATASET_PATH}: {str(e)}")
         raise
@@ -124,12 +117,19 @@ def get_client_location(ip_address) -> dict | None:
         )
         if response.status_code == 200:
             data = response.json()
             return {
                 'city': data.get('city'),
                 'region': data.get('region'),
                 'country': data.get('country_name'),
-                'latitude': data.get('latitude'),
-                'longitude': data.get('longitude')
             }
         elif response.status_code == 429:
             logging.warning(f"Rate limit exceeded. Response: {response.text}")

 import requests
 from datetime import datetime
 from uuid import uuid4
+import random
 def save_logs(scheduler, JSON_DATASET_PATH, logs, feedback=None) -> None:
     """ Every interaction with app saves the log of question and answer,
             with open(JSON_DATASET_PATH, 'a') as f:
                 json.dump(logs, f)
                 f.write("\n")
     except Exception as e:
         raise
         )
         if response.status_code == 200:
             data = response.json()
+            # Add random noise between -0.01 and 0.01 degrees (roughly ±1km)
+            lat = data.get('latitude')
+            lon = data.get('longitude')
+            if lat is not None and lon is not None:
+                lat += random.uniform(-0.01, 0.01)
+                lon += random.uniform(-0.01, 0.01)
             return {
                 'city': data.get('city'),
                 'region': data.get('region'),
                 'country': data.get('country_name'),
+                'latitude': lat,
+                'longitude': lon
             }
         elif response.status_code == 429:
             logging.warning(f"Rate limit exceeded. Response: {response.text}")