Spaces:

GIZ
/

audit_assistant

Running on T4

App Files Files Community

ppsingh commited on May 15

Commit

8b3920c

verified ·

1 Parent(s): 61c33da

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -1

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ from auditqa.sample_questions import QUESTIONS
 from auditqa.reports import files, report_list, new_files, new_report_list
 from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
 from auditqa.retriever import get_context
-from auditqa.reader import nvidia_client, dedicated_endpoint, serverless_api
 from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip, get_platform_info
 from dotenv import load_dotenv
 load_dotenv()
@@ -304,6 +304,34 @@ async def chat(query,history, method, sources,reports,subtype, client_ip=None, s
         async for update in process_stream():
             yield update
     elif model_config.get('reader','TYPE') == 'DEDICATED':
         chat_model = dedicated_endpoint()
         ### adding for assessing computation time

 from auditqa.reports import files, report_list, new_files, new_report_list
 from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
 from auditqa.retriever import get_context
+from auditqa.reader import nvidia_client, dedicated_endpoint, serverless_api, inf_provider
 from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template, get_client_location, get_client_ip, get_platform_info
 from dotenv import load_dotenv
 load_dotenv()
         async for update in process_stream():
             yield update
+    elif model_config.get('reader','TYPE') == 'INF_PROVIDERS':
+        chat_model = inf_provider()
+        start_time = time.time()
+        async def process_stream():
+            nonlocal answer_yet # Use the outer scope's answer_yet variable
+            # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
+            # instead of modifying the one from the outer scope.
+            # Iterate over the streaming response chunks
+            response = chat_model.chat.completions.create(
+                                        model=model_config.get("reader","INF_PROVIDER_MODEL"),
+                                        messages = messages,
+                                        stream= True,
+                                        max_tokens=int(model_config.get('reader','MAX_TOKENS')),
+                                    )
+            for message in response:
+                token = message.choices[0].delta.content
+                if token:
+                    answer_yet += token
+                    parsed_answer = parse_output_llm_with_sources(answer_yet)
+                    history[-1] = (query, parsed_answer)
+                    logs_data["answer"] = parsed_answer
+                    yield [tuple(x) for x in history], docs_html, logs_data, session_id
+        # Stream the response updates
+        async for update in process_stream():
+            yield update
     elif model_config.get('reader','TYPE') == 'DEDICATED':
         chat_model = dedicated_endpoint()
         ### adding for assessing computation time