mtyrrell commited on
Commit
72cb6c4
·
1 Parent(s): 427f510
Files changed (5) hide show
  1. app.py +14 -20
  2. auditqa/process_chunks.py +1 -6
  3. auditqa/reader.py +29 -20
  4. auditqa/retriever.py +0 -7
  5. auditqa/utils.py +10 -10
app.py CHANGED
@@ -14,20 +14,12 @@ from auditqa.retriever import get_context
14
  from auditqa.reader import nvidia_client, dedicated_endpoint
15
  from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip
16
  from dotenv import load_dotenv
 
17
  from threading import Lock
18
  from gradio.routes import Request
19
  from qdrant_client import QdrantClient
20
  import json
21
 
22
- # TESTING DEBUG LOG
23
- from auditqa.logging_config import setup_logging
24
- setup_logging()
25
- import logging
26
- logger = logging.getLogger(__name__)
27
- logger.setLevel(logging.DEBUG)
28
-
29
- load_dotenv()
30
-
31
  # # fetch tokens and model config params
32
  SPACES_LOG = os.environ["SPACES_LOG"]
33
  SPACES_LOG = os.getenv('SPACES_LOG')
@@ -50,7 +42,16 @@ scheduler = CommitScheduler(
50
  every=2) # TESTING: every 2 seconds
51
 
52
  #####--------------- VECTOR STORE -------------------------------------------------
53
- # Configure cloud Qdrant client
 
 
 
 
 
 
 
 
 
54
  def get_cloud_qdrant():
55
  from langchain_community.embeddings import HuggingFaceEmbeddings
56
  from langchain_community.vectorstores import Qdrant
@@ -102,13 +103,11 @@ def submit_feedback(feedback, logs_data):
102
  """Handle feedback submission"""
103
  try:
104
  if logs_data is None:
105
- logger.error("No logs data available for feedback")
106
  return gr.update(visible=False), gr.update(visible=True)
107
 
108
  save_logs(scheduler, JSON_DATASET_PATH, logs_data, feedback)
109
  return gr.update(visible=False), gr.update(visible=True)
110
  except Exception as e:
111
- logger.error(f"Error saving feedback: {e}")
112
  # Still need to return the expected outputs even on error
113
  return gr.update(visible=False), gr.update(visible=True)
114
 
@@ -149,16 +148,13 @@ async def chat(query, history, sources, reports, subtype, year, client_ip=None,
149
 
150
  if not session_id: # Session managment
151
  session_id = session_manager.create_session(client_ip)
152
- logger.debug(f"Created new session: {session_id}")
153
  else:
154
  session_manager.update_session(session_id)
155
- logger.debug(f"Updated existing session: {session_id}")
156
 
157
  # Get session data
158
  session_data = session_manager.get_session_data(session_id)
159
  session_duration = session_manager.get_session_duration(session_id)
160
- logger.debug(f"Session duration calculated: {session_duration}")
161
-
162
  print(f">> NEW QUESTION : {query}")
163
  print(f"history:{history}")
164
  print(f"sources:{sources}")
@@ -232,7 +228,6 @@ async def chat(query, history, sources, reports, subtype, year, client_ip=None,
232
  "answer": "",
233
  "time": timestamp,
234
  }
235
- logger.debug(f"Logs data before save: {json.dumps(logs_data, indent=2)}")
236
 
237
  if model_config.get('reader','TYPE') == 'NVIDIA':
238
  chat_model = nvidia_client()
@@ -291,7 +286,6 @@ async def chat(query, history, sources, reports, subtype, year, client_ip=None,
291
  await asyncio.sleep(0.05)
292
 
293
  except Exception as e:
294
- logger.error(f"Error in process_stream: {str(e)}")
295
  raise
296
 
297
  async for update in process_stream():
@@ -300,9 +294,9 @@ async def chat(query, history, sources, reports, subtype, year, client_ip=None,
300
  try:
301
  # Save log after streaming is complete
302
  save_logs(scheduler, JSON_DATASET_PATH, logs_data)
303
- logger.debug(f"Logs saved successfully")
304
  except Exception as e:
305
- logger.error(f"Error saving logs: {str(e)}")
 
306
 
307
 
308
 
 
14
  from auditqa.reader import nvidia_client, dedicated_endpoint
15
  from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip
16
  from dotenv import load_dotenv
17
+ load_dotenv()
18
  from threading import Lock
19
  from gradio.routes import Request
20
  from qdrant_client import QdrantClient
21
  import json
22
 
 
 
 
 
 
 
 
 
 
23
  # # fetch tokens and model config params
24
  SPACES_LOG = os.environ["SPACES_LOG"]
25
  SPACES_LOG = os.getenv('SPACES_LOG')
 
42
  every=2) # TESTING: every 2 seconds
43
 
44
  #####--------------- VECTOR STORE -------------------------------------------------
45
+ # reports contain the already created chunks from Markdown version of pdf reports
46
+ # document processing was done using : https://github.com/axa-group/Parsr
47
+ # We need to create the local vectorstore collection once using load_chunks
48
+ # vectorestore colection are stored on persistent storage so this needs to be run only once
49
+ # hence, comment out line below when creating for first time
50
+ #vectorstores = load_new_chunks()
51
+ # once the vectore embeddings are created we will use qdrant client to access these
52
+ # vectorstores = get_local_qdrant()
53
+
54
+ # Configure cloud Qdrant client #TESTING
55
  def get_cloud_qdrant():
56
  from langchain_community.embeddings import HuggingFaceEmbeddings
57
  from langchain_community.vectorstores import Qdrant
 
103
  """Handle feedback submission"""
104
  try:
105
  if logs_data is None:
 
106
  return gr.update(visible=False), gr.update(visible=True)
107
 
108
  save_logs(scheduler, JSON_DATASET_PATH, logs_data, feedback)
109
  return gr.update(visible=False), gr.update(visible=True)
110
  except Exception as e:
 
111
  # Still need to return the expected outputs even on error
112
  return gr.update(visible=False), gr.update(visible=True)
113
 
 
148
 
149
  if not session_id: # Session managment
150
  session_id = session_manager.create_session(client_ip)
 
151
  else:
152
  session_manager.update_session(session_id)
 
153
 
154
  # Get session data
155
  session_data = session_manager.get_session_data(session_id)
156
  session_duration = session_manager.get_session_duration(session_id)
157
+
 
158
  print(f">> NEW QUESTION : {query}")
159
  print(f"history:{history}")
160
  print(f"sources:{sources}")
 
228
  "answer": "",
229
  "time": timestamp,
230
  }
 
231
 
232
  if model_config.get('reader','TYPE') == 'NVIDIA':
233
  chat_model = nvidia_client()
 
286
  await asyncio.sleep(0.05)
287
 
288
  except Exception as e:
 
289
  raise
290
 
291
  async for update in process_stream():
 
294
  try:
295
  # Save log after streaming is complete
296
  save_logs(scheduler, JSON_DATASET_PATH, logs_data)
 
297
  except Exception as e:
298
+ raise
299
+
300
 
301
 
302
 
auditqa/process_chunks.py CHANGED
@@ -17,11 +17,6 @@ from pathlib import Path
17
  device = 'cuda' if cuda.is_available() else 'cpu'
18
  path_to_data = "./reports/"
19
 
20
- # TESTING DEBUG LOG
21
- from auditqa.logging_config import setup_logging
22
- setup_logging()
23
- import logging
24
- logger = logging.getLogger(__name__)
25
 
26
 
27
  ##---------------------functions -------------------------------------------##
@@ -125,7 +120,7 @@ def load_new_chunks():
125
  """
126
  this method reads through the files and report_list to create the vector database
127
  """
128
- logger.info("Loading new chunks")
129
  # we iterate through the files which contain information about its
130
  # 'source'=='category', 'subtype', these are used in UI for document selection
131
  # which will be used later for filtering database
 
17
  device = 'cuda' if cuda.is_available() else 'cpu'
18
  path_to_data = "./reports/"
19
 
 
 
 
 
 
20
 
21
 
22
  ##---------------------functions -------------------------------------------##
 
120
  """
121
  this method reads through the files and report_list to create the vector database
122
  """
123
+
124
  # we iterate through the files which contain information about its
125
  # 'source'=='category', 'subtype', these are used in UI for document selection
126
  # which will be used later for filtering database
auditqa/reader.py CHANGED
@@ -7,48 +7,57 @@ import os
7
  from dotenv import load_dotenv
8
  load_dotenv()
9
 
10
- # TESTING DEBUG LOG
11
- from auditqa.logging_config import setup_logging
12
- setup_logging()
13
- import logging
14
- logger = logging.getLogger(__name__)
15
-
16
  model_config = getconfig("model_params.cfg")
17
  # NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"] #TESTING
18
  HF_token = os.environ["LLAMA_3_1"]
19
 
20
  def nvidia_client():
21
- logger.info("NVIDIA client activated")
22
  """ returns the nvidia server client """
23
- try:
24
- NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"]
25
- client = InferenceClient(
26
- base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
27
- api_key=NVIDIA_SERVER)
28
- print("getting nvidia client")
29
- return client
30
- except KeyError:
31
- raise KeyError("NVIDIA_SERVERLESS environment variable not set. Required for NVIDIA endpoint.")
32
 
33
  # TESTING VERSION
34
  def dedicated_endpoint():
35
- logger.info("Serverless endpoint activated")
36
  try:
37
  HF_token = os.environ["LLAMA_3_1"]
38
  if not HF_token:
39
  raise ValueError("LLAMA_3_1 environment variable is empty")
40
 
41
  model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
42
- logger.info(f"Initializing InferenceClient with model: {model_id}")
43
 
44
  client = InferenceClient(
45
  model=model_id,
46
  api_key=HF_token,
47
  )
48
 
49
- logger.info("Serverless InferenceClient initialization successful")
50
  return client
51
 
52
  except Exception as e:
53
- logger.error(f"Error initializing dedicated endpoint: {str(e)}")
54
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from dotenv import load_dotenv
8
  load_dotenv()
9
 
 
 
 
 
 
 
10
  model_config = getconfig("model_params.cfg")
11
  # NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"] #TESTING
12
  HF_token = os.environ["LLAMA_3_1"]
13
 
14
  def nvidia_client():
 
15
  """ returns the nvidia server client """
16
+ client = InferenceClient(
17
+ base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
18
+ api_key=NVIDIA_SERVER)
19
+ print("getting nvidia client")
20
+
21
+ return client
 
 
 
22
 
23
  # TESTING VERSION
24
  def dedicated_endpoint():
 
25
  try:
26
  HF_token = os.environ["LLAMA_3_1"]
27
  if not HF_token:
28
  raise ValueError("LLAMA_3_1 environment variable is empty")
29
 
30
  model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 
31
 
32
  client = InferenceClient(
33
  model=model_id,
34
  api_key=HF_token,
35
  )
36
 
 
37
  return client
38
 
39
  except Exception as e:
 
40
  raise
41
+
42
+
43
+ # def dedicated_endpoint():
44
+ # """ returns the dedicated server endpoint"""
45
+
46
+ # # Set up the streaming callback handler
47
+ # callback = StreamingStdOutCallbackHandler()
48
+
49
+ # # Initialize the HuggingFaceEndpoint with streaming enabled
50
+ # llm_qa = HuggingFaceEndpoint(
51
+ # endpoint_url=model_config.get('reader', 'DEDICATED_ENDPOINT'),
52
+ # max_new_tokens=int(model_config.get('reader','MAX_TOKENS')),
53
+ # repetition_penalty=1.03,
54
+ # timeout=70,
55
+ # huggingfacehub_api_token=HF_token,
56
+ # streaming=True, # Enable streaming for real-time token generation
57
+ # callbacks=[callback] # Add the streaming callback handler
58
+ # )
59
+
60
+ # # Create a ChatHuggingFace instance with the streaming-enabled endpoint
61
+ # chat_model = ChatHuggingFace(llm=llm_qa)
62
+ # print("getting dedicated endpoint wrapped in ChathuggingFace ")
63
+ # return chat_model
auditqa/retriever.py CHANGED
@@ -4,11 +4,6 @@ from langchain.retrievers import ContextualCompressionRetriever
4
  from langchain.retrievers.document_compressors import CrossEncoderReranker
5
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
6
 
7
- # TESTING DEBUG LOG
8
- from auditqa.logging_config import setup_logging
9
- setup_logging()
10
- import logging
11
- logger = logging.getLogger(__name__)
12
 
13
  model_config = getconfig("model_params.cfg")
14
 
@@ -42,7 +37,6 @@ def create_filter(reports:list = [],sources:str =None,
42
 
43
 
44
  def get_context(vectorstore,query,reports,sources,subtype,year):
45
- logger.info("Retriever activated")
46
  # create metadata filter
47
  # filter = create_filter(reports=reports,sources=sources,subtype=subtype,year=year)
48
  filter = None #TESTING
@@ -60,7 +54,6 @@ def get_context(vectorstore,query,reports,sources,subtype,year):
60
  )
61
 
62
  context_retrieved = compression_retriever.invoke(query)
63
- logger.info(f"retrieved paragraphs:{len(context_retrieved)}")
64
  print(f"retrieved paragraphs:{len(context_retrieved)}")
65
 
66
  return context_retrieved
 
4
  from langchain.retrievers.document_compressors import CrossEncoderReranker
5
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
6
 
 
 
 
 
 
7
 
8
  model_config = getconfig("model_params.cfg")
9
 
 
37
 
38
 
39
  def get_context(vectorstore,query,reports,sources,subtype,year):
 
40
  # create metadata filter
41
  # filter = create_filter(reports=reports,sources=sources,subtype=subtype,year=year)
42
  filter = None #TESTING
 
54
  )
55
 
56
  context_retrieved = compression_retriever.invoke(query)
 
57
  print(f"retrieved paragraphs:{len(context_retrieved)}")
58
 
59
  return context_retrieved
auditqa/utils.py CHANGED
@@ -8,14 +8,9 @@ from langchain.schema import (
8
  import requests
9
  from datetime import datetime
10
  from uuid import uuid4
 
11
 
12
 
13
- # TESTING DEBUG LOG
14
- from auditqa.logging_config import setup_logging
15
- setup_logging()
16
- import logging
17
- logger = logging.getLogger(__name__)
18
-
19
 
20
  def save_logs(scheduler, JSON_DATASET_PATH, logs, feedback=None) -> None:
21
  """ Every interaction with app saves the log of question and answer,
@@ -30,9 +25,7 @@ def save_logs(scheduler, JSON_DATASET_PATH, logs, feedback=None) -> None:
30
  with open(JSON_DATASET_PATH, 'a') as f:
31
  json.dump(logs, f)
32
  f.write("\n")
33
- logger.info("logging done")
34
  except Exception as e:
35
- logger.error(f"Failed to save logs to {JSON_DATASET_PATH}: {str(e)}")
36
  raise
37
 
38
 
@@ -124,12 +117,19 @@ def get_client_location(ip_address) -> dict | None:
124
  )
125
  if response.status_code == 200:
126
  data = response.json()
 
 
 
 
 
 
 
127
  return {
128
  'city': data.get('city'),
129
  'region': data.get('region'),
130
  'country': data.get('country_name'),
131
- 'latitude': data.get('latitude'),
132
- 'longitude': data.get('longitude')
133
  }
134
  elif response.status_code == 429:
135
  logging.warning(f"Rate limit exceeded. Response: {response.text}")
 
8
  import requests
9
  from datetime import datetime
10
  from uuid import uuid4
11
+ import random
12
 
13
 
 
 
 
 
 
 
14
 
15
  def save_logs(scheduler, JSON_DATASET_PATH, logs, feedback=None) -> None:
16
  """ Every interaction with app saves the log of question and answer,
 
25
  with open(JSON_DATASET_PATH, 'a') as f:
26
  json.dump(logs, f)
27
  f.write("\n")
 
28
  except Exception as e:
 
29
  raise
30
 
31
 
 
117
  )
118
  if response.status_code == 200:
119
  data = response.json()
120
+ # Add random noise between -0.01 and 0.01 degrees (roughly ±1km)
121
+ lat = data.get('latitude')
122
+ lon = data.get('longitude')
123
+ if lat is not None and lon is not None:
124
+ lat += random.uniform(-0.01, 0.01)
125
+ lon += random.uniform(-0.01, 0.01)
126
+
127
  return {
128
  'city': data.get('city'),
129
  'region': data.get('region'),
130
  'country': data.get('country_name'),
131
+ 'latitude': lat,
132
+ 'longitude': lon
133
  }
134
  elif response.status_code == 429:
135
  logging.warning(f"Rate limit exceeded. Response: {response.text}")