Spaces:

prithvirajpawar
/

insuranceai

Running

App Files Files Community

prithvirajpawar commited on Jan 20

Commit

527779b

1 Parent(s): 09a6298

changes for voice input

Browse files

Files changed (3) hide show

app.py +50 -21
helpmate_ai.py +2 -2
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import FastAPI, Request, Depends, HTTPException, Header
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import List, Optional
@@ -8,12 +8,20 @@ import os
 from dotenv import load_dotenv
 import re
 # Load environment variables
 load_dotenv()
 gemini_api_key = os.getenv("GEMINI_API_KEY")
 genai.configure(api_key=gemini_api_key)
 # Initialize FastAPI app
 app = FastAPI()
@@ -25,7 +33,6 @@ app = FastAPI()
 #     allow_methods=["*"],
 #     allow_headers=["*"],
 # )
-# Define a secret API key (use environment variables in production)
 # Pydantic models for request/response validation
 class Message(BaseModel):
@@ -44,27 +51,17 @@ conversation_bot = []
 conversation = initialize_conversation()
 model = genai.GenerativeModel("gemini-1.5-flash", system_instruction=conversation)
-def format_rag_response(response_text: str) -> str:
-    # formatted_text = response_text.replace("\n", "<br>")
-    formatted_text=response_text
-    formatted_text = re.sub(r'(\*\*.*?\*\*)', r'<strong>\1</strong>', formatted_text).replace("**", "")
-    formatted_text = re.sub(r'(\d+\.\s)', r'<br><strong>\1</strong>', formatted_text)
-    formatted_text = re.sub(r'(\-\s)', r'<br>&bull; ', formatted_text)
-    formatted_text = re.sub(r'(Citations?:\s)', r'<br><em>\1</em>', formatted_text)
-    formatted_text = re.sub(r'\|\s*', r'</td><td>', formatted_text)
-    formatted_text = re.sub(r'\n\|\s*', r'<tr><td>', formatted_text)
-    return formatted_text
-def get_gemini_completions(conversation: str) -> str:
-    response = model.generate_content(conversation)
-    return response.text
-API_KEY = os.getenv("API_KEY")
 # Dependency to check the API key
 async def verify_api_key(x_api_key: str = Header(...)):
     if x_api_key != API_KEY:
         raise HTTPException(status_code=403, detail="Unauthorized")
 # @app.get("/secure-endpoint", dependencies=[Depends(verify_api_key)])
 # async def secure_endpoint():
@@ -95,17 +92,49 @@ async def chat(request: ChatRequest):
     top_docs = rerank_with_cross_encoder(request.message, results_df)
     messages = generate_response(request.message, top_docs)
     response_assistant = get_gemini_completions(messages)
-    formatted_response = format_rag_response(response_assistant)
     # Add bot response to conversation
-    bot_message = Message(role="bot", content=formatted_response)
     conversation_bot.append(bot_message)
     return ChatResponse(
-        response=formatted_response,
         conversation=conversation_bot
     )
 # Reset conversation endpoint
 @app.post("/reset", dependencies=[Depends(verify_api_key)])
 async def reset_conversation():

+from fastapi import FastAPI, Request, Depends, HTTPException, Header, File, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import List, Optional
 from dotenv import load_dotenv
 import re
+import speech_recognition as sr
+from io import BytesIO
+import wave
+import google.generativeai as genai
 # Load environment variables
 load_dotenv()
 gemini_api_key = os.getenv("GEMINI_API_KEY")
 genai.configure(api_key=gemini_api_key)
+# Define a secret API key (use environment variables in production)
+API_KEY = os.getenv("API_KEY")
 # Initialize FastAPI app
 app = FastAPI()
 #     allow_methods=["*"],
 #     allow_headers=["*"],
 # )
 # Pydantic models for request/response validation
 class Message(BaseModel):
 conversation = initialize_conversation()
 model = genai.GenerativeModel("gemini-1.5-flash", system_instruction=conversation)
+# Initialize speech recognizer
+recognizer = sr.Recognizer()
 # Dependency to check the API key
 async def verify_api_key(x_api_key: str = Header(...)):
     if x_api_key != API_KEY:
         raise HTTPException(status_code=403, detail="Unauthorized")
+def get_gemini_completions(conversation: str) -> str:
+    response = model.generate_content(conversation)
+    return response.text
 # @app.get("/secure-endpoint", dependencies=[Depends(verify_api_key)])
 # async def secure_endpoint():
     top_docs = rerank_with_cross_encoder(request.message, results_df)
     messages = generate_response(request.message, top_docs)
     response_assistant = get_gemini_completions(messages)
+    # formatted_response = format_rag_response(response_assistant)
     # Add bot response to conversation
+    bot_message = Message(role="bot", content=response_assistant)
     conversation_bot.append(bot_message)
     return ChatResponse(
+        response=response_assistant,
         conversation=conversation_bot
     )
+# Voice processing endpoint
+@app.post("/process-voice")
+async def process_voice(audio_file: UploadFile = File(...)):
+# async def process_voice(name: str):
+    try:
+        # Read the audio file
+        contents = await audio_file.read()
+        audio_data = BytesIO(contents)
+        # Convert audio to wav format for speech recognition
+        with sr.AudioFile(audio_data) as source:
+            audio = recognizer.record(source)
+        # Perform speech recognition
+        text = recognizer.recognize_google(audio)
+        # print(text)
+        # Process the text through the chat pipeline
+        results_df = retreive_results(text)
+        top_docs = rerank_with_cross_encoder(text, results_df)
+        messages = generate_response(text, top_docs)
+        response_assistant = get_gemini_completions(messages)
+        return {
+            "transcribed_text": text,
+            "response": response_assistant
+        }
+    except Exception as e:
+        return {"error": f"Error processing voice input: {str(e)}"}
 # Reset conversation endpoint
 @app.post("/reset", dependencies=[Depends(verify_api_key)])
 async def reset_conversation():

helpmate_ai.py CHANGED Viewed

@@ -40,13 +40,13 @@ def initialize_conversation():
             Guidelines:
             1. Extract information that directly answers the user's query from the document excerpts.
-            3. Provide the final response as a well-formatted and easily readable text along with the citation.
             4. Provide your complete response using the relevant parts in the documents.
             5. The generated response should answer the query directly addressing the user and avoiding additional information.
             6. If the provided excerpts do not fully answer the query, provide partial information and suggest which sections of the policy document the user should review for further details.
             7. If no relevant information is found in the provided excerpts, respond with 'No relevant information found in the provided excerpts.'
-            # Start with a short welcome message with smiley only in the begining of the chat session and not in every response.
             """
     ]

             Guidelines:
             1. Extract information that directly answers the user's query from the document excerpts.
+            3. Provide the final response as a well-formatted HTML and easily readable text along with the citation.
             4. Provide your complete response using the relevant parts in the documents.
             5. The generated response should answer the query directly addressing the user and avoiding additional information.
             6. If the provided excerpts do not fully answer the query, provide partial information and suggest which sections of the policy document the user should review for further details.
             7. If no relevant information is found in the provided excerpts, respond with 'No relevant information found in the provided excerpts.'
+            # Start the session with a short welcome message and a smiley.
             """
     ]

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ uvicorn[standard]
 jinja2
 python-multipart
 sentence-transformers
-python-dotenv

 jinja2
 python-multipart
 sentence-transformers
+python-dotenv
+SpeechRecognition