prithvirajpawar commited on
Commit
527779b
·
1 Parent(s): 09a6298

changes for voice input

Browse files
Files changed (3) hide show
  1. app.py +50 -21
  2. helpmate_ai.py +2 -2
  3. requirements.txt +2 -1
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, Request, Depends, HTTPException, Header
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  from typing import List, Optional
@@ -8,12 +8,20 @@ import os
8
  from dotenv import load_dotenv
9
  import re
10
 
 
 
 
 
 
11
 
12
  # Load environment variables
13
  load_dotenv()
14
  gemini_api_key = os.getenv("GEMINI_API_KEY")
15
  genai.configure(api_key=gemini_api_key)
16
 
 
 
 
17
  # Initialize FastAPI app
18
  app = FastAPI()
19
 
@@ -25,7 +33,6 @@ app = FastAPI()
25
  # allow_methods=["*"],
26
  # allow_headers=["*"],
27
  # )
28
- # Define a secret API key (use environment variables in production)
29
 
30
  # Pydantic models for request/response validation
31
  class Message(BaseModel):
@@ -44,27 +51,17 @@ conversation_bot = []
44
  conversation = initialize_conversation()
45
  model = genai.GenerativeModel("gemini-1.5-flash", system_instruction=conversation)
46
 
47
- def format_rag_response(response_text: str) -> str:
48
- # formatted_text = response_text.replace("\n", "<br>")
49
- formatted_text=response_text
50
- formatted_text = re.sub(r'(\*\*.*?\*\*)', r'<strong>\1</strong>', formatted_text).replace("**", "")
51
- formatted_text = re.sub(r'(\d+\.\s)', r'<br><strong>\1</strong>', formatted_text)
52
- formatted_text = re.sub(r'(\-\s)', r'<br>&bull; ', formatted_text)
53
- formatted_text = re.sub(r'(Citations?:\s)', r'<br><em>\1</em>', formatted_text)
54
- formatted_text = re.sub(r'\|\s*', r'</td><td>', formatted_text)
55
- formatted_text = re.sub(r'\n\|\s*', r'<tr><td>', formatted_text)
56
- return formatted_text
57
-
58
- def get_gemini_completions(conversation: str) -> str:
59
- response = model.generate_content(conversation)
60
- return response.text
61
-
62
- API_KEY = os.getenv("API_KEY")
63
 
64
  # Dependency to check the API key
65
  async def verify_api_key(x_api_key: str = Header(...)):
66
  if x_api_key != API_KEY:
67
  raise HTTPException(status_code=403, detail="Unauthorized")
 
 
 
 
68
 
69
  # @app.get("/secure-endpoint", dependencies=[Depends(verify_api_key)])
70
  # async def secure_endpoint():
@@ -95,17 +92,49 @@ async def chat(request: ChatRequest):
95
  top_docs = rerank_with_cross_encoder(request.message, results_df)
96
  messages = generate_response(request.message, top_docs)
97
  response_assistant = get_gemini_completions(messages)
98
- formatted_response = format_rag_response(response_assistant)
99
 
100
  # Add bot response to conversation
101
- bot_message = Message(role="bot", content=formatted_response)
102
  conversation_bot.append(bot_message)
103
 
104
  return ChatResponse(
105
- response=formatted_response,
106
  conversation=conversation_bot
107
  )
 
 
 
 
 
 
 
 
 
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # Reset conversation endpoint
110
  @app.post("/reset", dependencies=[Depends(verify_api_key)])
111
  async def reset_conversation():
 
1
+ from fastapi import FastAPI, Request, Depends, HTTPException, Header, File, UploadFile
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  from typing import List, Optional
 
8
  from dotenv import load_dotenv
9
  import re
10
 
11
+ import speech_recognition as sr
12
+ from io import BytesIO
13
+ import wave
14
+ import google.generativeai as genai
15
+
16
 
17
  # Load environment variables
18
  load_dotenv()
19
  gemini_api_key = os.getenv("GEMINI_API_KEY")
20
  genai.configure(api_key=gemini_api_key)
21
 
22
+ # Define a secret API key (use environment variables in production)
23
+ API_KEY = os.getenv("API_KEY")
24
+
25
  # Initialize FastAPI app
26
  app = FastAPI()
27
 
 
33
  # allow_methods=["*"],
34
  # allow_headers=["*"],
35
  # )
 
36
 
37
  # Pydantic models for request/response validation
38
  class Message(BaseModel):
 
51
  conversation = initialize_conversation()
52
  model = genai.GenerativeModel("gemini-1.5-flash", system_instruction=conversation)
53
 
54
+ # Initialize speech recognizer
55
+ recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Dependency to check the API key
58
  async def verify_api_key(x_api_key: str = Header(...)):
59
  if x_api_key != API_KEY:
60
  raise HTTPException(status_code=403, detail="Unauthorized")
61
+
62
+ def get_gemini_completions(conversation: str) -> str:
63
+ response = model.generate_content(conversation)
64
+ return response.text
65
 
66
  # @app.get("/secure-endpoint", dependencies=[Depends(verify_api_key)])
67
  # async def secure_endpoint():
 
92
  top_docs = rerank_with_cross_encoder(request.message, results_df)
93
  messages = generate_response(request.message, top_docs)
94
  response_assistant = get_gemini_completions(messages)
95
+ # formatted_response = format_rag_response(response_assistant)
96
 
97
  # Add bot response to conversation
98
+ bot_message = Message(role="bot", content=response_assistant)
99
  conversation_bot.append(bot_message)
100
 
101
  return ChatResponse(
102
+ response=response_assistant,
103
  conversation=conversation_bot
104
  )
105
+ # Voice processing endpoint
106
+ @app.post("/process-voice")
107
+ async def process_voice(audio_file: UploadFile = File(...)):
108
+ # async def process_voice(name: str):
109
+
110
+ try:
111
+ # Read the audio file
112
+ contents = await audio_file.read()
113
+ audio_data = BytesIO(contents)
114
 
115
+
116
+ # Convert audio to wav format for speech recognition
117
+ with sr.AudioFile(audio_data) as source:
118
+ audio = recognizer.record(source)
119
+
120
+ # Perform speech recognition
121
+ text = recognizer.recognize_google(audio)
122
+ # print(text)
123
+
124
+ # Process the text through the chat pipeline
125
+ results_df = retreive_results(text)
126
+ top_docs = rerank_with_cross_encoder(text, results_df)
127
+ messages = generate_response(text, top_docs)
128
+ response_assistant = get_gemini_completions(messages)
129
+
130
+ return {
131
+ "transcribed_text": text,
132
+ "response": response_assistant
133
+ }
134
+
135
+ except Exception as e:
136
+ return {"error": f"Error processing voice input: {str(e)}"}
137
+
138
  # Reset conversation endpoint
139
  @app.post("/reset", dependencies=[Depends(verify_api_key)])
140
  async def reset_conversation():
helpmate_ai.py CHANGED
@@ -40,13 +40,13 @@ def initialize_conversation():
40
 
41
  Guidelines:
42
  1. Extract information that directly answers the user's query from the document excerpts.
43
- 3. Provide the final response as a well-formatted and easily readable text along with the citation.
44
  4. Provide your complete response using the relevant parts in the documents.
45
  5. The generated response should answer the query directly addressing the user and avoiding additional information.
46
  6. If the provided excerpts do not fully answer the query, provide partial information and suggest which sections of the policy document the user should review for further details.
47
  7. If no relevant information is found in the provided excerpts, respond with 'No relevant information found in the provided excerpts.'
48
 
49
- # Start with a short welcome message with smiley only in the begining of the chat session and not in every response.
50
  """
51
  ]
52
 
 
40
 
41
  Guidelines:
42
  1. Extract information that directly answers the user's query from the document excerpts.
43
+ 3. Provide the final response as a well-formatted HTML and easily readable text along with the citation.
44
  4. Provide your complete response using the relevant parts in the documents.
45
  5. The generated response should answer the query directly addressing the user and avoiding additional information.
46
  6. If the provided excerpts do not fully answer the query, provide partial information and suggest which sections of the policy document the user should review for further details.
47
  7. If no relevant information is found in the provided excerpts, respond with 'No relevant information found in the provided excerpts.'
48
 
49
+ # Start the session with a short welcome message and a smiley.
50
  """
51
  ]
52
 
requirements.txt CHANGED
@@ -7,4 +7,5 @@ uvicorn[standard]
7
  jinja2
8
  python-multipart
9
  sentence-transformers
10
- python-dotenv
 
 
7
  jinja2
8
  python-multipart
9
  sentence-transformers
10
+ python-dotenv
11
+ SpeechRecognition