Spaces:
Sleeping
Sleeping
File size: 10,833 Bytes
e36c22a 6df9459 e36c22a 6df9459 189677e 6df9459 189677e 6df9459 189677e 6df9459 189677e 6df9459 189677e 6df9459 e36c22a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 |
# NEW CODE
import os
import json
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from vectorize_documents import embeddings
import speech_recognition as sr
from deep_translator import GoogleTranslator
# Set up working directory and API configuration
working_dir = os.path.dirname(os.path.abspath(__file__))
config_data = json.load(open(f"{working_dir}/config.json"))
os.environ["GROQ_API_KEY"] = config_data["GROQ_API_KEY"]
# Streamlit session state initialization
def initialize_session_state():
if "chat_history" not in st.session_state:
st.session_state["chat_history"] = []
if "vectorstore" not in st.session_state:
st.session_state["vectorstore"] = setup_vectorstore()
if "chain" not in st.session_state:
st.session_state["chain"] = chat_chain(st.session_state["vectorstore"])
# Vectorstore setup
def setup_vectorstore():
embeddings = HuggingFaceEmbeddings()
vectorstore = Chroma(
persist_directory=f"{working_dir}/vector_db_dir",
embedding_function=embeddings
)
return vectorstore
# Chat chain setup
def chat_chain(vectorstore):
from langchain_groq import ChatGroq
llm = ChatGroq(
model="llama-3.1-70b-versatile",
temperature=0
)
retriever = vectorstore.as_retriever()
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
chain_type="stuff",
memory=memory,
verbose=True
)
return chain
# Transcription function
def transcribe_audio(selected_language):
try:
recognizer = sr.Recognizer()
with sr.Microphone() as source:
st.write("π€ Listening... Please ask your question.")
try:
audio = recognizer.listen(source, timeout=5)
query = recognizer.recognize_google(audio, language=selected_language)
st.write(f"**π£οΈ You said:** {query}")
return query
except sr.WaitTimeoutError:
st.error("β³ You didn't speak in time. Please try again.")
except sr.UnknownValueError:
st.error("β Sorry, could not understand the audio. Please try again.")
except sr.RequestError as e:
st.error(f"β οΈ Error with speech recognition service: {e}")
except AttributeError:
st.error("β Microphone or PyAudio not available. Please check installation.")
except OSError as e:
st.error(f"β οΈ Audio input error: {e}")
return None
# Translation functions
def translate_to_english(text, source_lang):
if source_lang == "en": # Skip translation if the language is English
return text
return GoogleTranslator(source=source_lang, target="en").translate(text)
def translate_from_english(text, target_lang):
if target_lang == "en": # Skip translation if the language is English
return text
return GoogleTranslator(source="en", target=target_lang).translate(text)
# Streamlit UI
initialize_session_state()
st.markdown(
"""
<style>
.main-title {
font-size: 36px;
color: #FF8C00;
font-weight: bold;
}
.sub-title {
font-size: 24px;
color: #FF8C00;
}
.icon {
font-size: 50px;
color: #FF8C00;
}
</style>
""",
unsafe_allow_html=True
)
st.markdown('<div class="icon">π</div>', unsafe_allow_html=True)
st.markdown('<div class="main-title">Bhagavad Gita & Yoga Sutras Query Assistant</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-title">Ask questions and explore timeless wisdom</div>', unsafe_allow_html=True)
# Language support
indian_languages = {
"English": "en",
"Assamese": "as",
"Bengali": "bn",
"Gujarati": "gu",
"Hindi": "hi",
"Kannada": "kn",
"Kashmiri": "ks",
"Konkani": "kok",
"Malayalam": "ml",
"Manipuri": "mni",
"Marathi": "mr",
"Nepali": "ne",
"Odia": "or",
"Punjabi": "pa",
"Sanskrit": "sa",
"Santali": "sat",
"Sindhi": "sd",
"Tamil": "ta",
"Telugu": "te",
"Urdu": "ur",
"Bodo": "brx",
"Dogri": "doi",
"Maithili": "mai",
"Santhali": "sat",
"Tulu": "tcy",
"Bhili/Bhilodi": "bhi",
"Khasi": "kha",
"Garo": "grt",
"Mizo": "lus",
"Sora": "srb",
"Ho": "hoc",
"Kurukh": "kru",
"Korwa": "kfa",
"Gondi": "gon",
"Konkani": "kok"
}
selected_language = st.selectbox("Select your language:", options=list(indian_languages.keys()))
language_code = indian_languages[selected_language]
# User-friendly input selection
st.markdown("### How would you like to ask your question?")
input_mode = st.radio("Choose input method:", ("Voice", "Typing"))
user_query = None # Initialize the variable to ensure it's always defined
if input_mode == "Voice":
st.write("Click the button below to speak your question:")
if st.button("π€ Use Voice Input"):
user_query = transcribe_audio(language_code)
if user_query:
user_query = translate_to_english(user_query, language_code)
else:
user_query = st.text_input("Type your question here:")
if user_query:
user_query = translate_to_english(user_query, language_code)
if user_query: # This check will now always work
with st.spinner("Getting answer..."):
response = st.session_state["chain"]({"question": user_query})
relevant_content = response.get("source_documents", None)
if not relevant_content:
st.markdown("### β **No Answer Available:**")
st.write("The system does not have sufficient information to answer this question.")
else:
answer = response.get("answer", None)
translated_answer = translate_from_english(answer, language_code)
st.markdown("### β
**Answer:**")
st.write(translated_answer)
# OLD CODE IS BELOW
'''import os
import json
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from vectorize_documents import embeddings # Import embeddings from the vectorization script
import speech_recognition as sr # For voice recognition
# Set up working directory and API configuration
working_dir = os.path.dirname(os.path.abspath(__file__))
config_data = json.load(open(f"{working_dir}/config.json"))
os.environ["GROQ_API_KEY"] = config_data["GROQ_API_KEY"]
def setup_vectorstore():
persist_directory = f"{working_dir}/vector_db_dir"
vectorstore = Chroma(
persist_directory=persist_directory,
embedding_function=embeddings
)
return vectorstore
def chat_chain(vectorstore):
from langchain_groq import ChatGroq # Import the LLM class
llm = ChatGroq(
model="llama-3.1-70b-versatile", # Replace with your LLM of choice
temperature=0 # Set low temperature to reduce hallucinations
)
retriever = vectorstore.as_retriever() # Retrieve relevant chunks
memory = ConversationBufferMemory(
llm=llm,
output_key="answer",
memory_key="chat_history",
return_messages=True
)
# Build the conversational retrieval chain
chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=retriever,
chain_type="stuff", # Define how documents are combined
memory=memory,
verbose=True,
return_source_documents=True
)
return chain
def transcribe_audio(selected_language):
"""Function to capture and transcribe audio in the selected language."""
try:
recognizer = sr.Recognizer()
with sr.Microphone() as source:
st.write("π€ Listening... Please ask your question.")
try:
audio = recognizer.listen(source, timeout=5) # 5 seconds to start speaking
query = recognizer.recognize_google(audio, language=selected_language) # Transcribe audio in selected language
st.write(f"**π£οΈ You said:** {query}")
return query
except sr.WaitTimeoutError:
st.error("β³ You didn't speak in time. Please try again.")
except sr.UnknownValueError:
st.error("β Sorry, could not understand the audio. Please try again.")
except sr.RequestError as e:
st.error(f"β οΈ Error with speech recognition service: {e}")
except AttributeError:
st.error("β Microphone or PyAudio not available. Please check installation.")
except OSError as e:
st.error(f"β οΈ Audio input error: {e}")
return None
# Streamlit UI
st.markdown(
"""
<style>
.main-title {
font-size: 36px;
color: #FF8C00;
font-weight: bold;
}
.sub-title {
font-size: 24px;
color: #FF8C00;
}
.icon {
font-size: 50px;
color: #FF8C00;
}
</style>
""",
unsafe_allow_html=True
)
st.markdown('<div class="icon">π</div>', unsafe_allow_html=True)
st.markdown('<div class="main-title">Bhagavad Gita & Yoga Sutras Query Assistant</div>', unsafe_allow_html=True)
st.markdown('<div class="sub-title">Ask questions and explore timeless wisdom</div>', unsafe_allow_html=True)
vectorstore = setup_vectorstore()
chain = chat_chain(vectorstore)
# User input options
st.write("You can either type your question or use voice search:")
st.markdown("### π Type your query or ποΈ Use voice search")
# Multilingual support: Select language for voice input
language_options = {
"English": "en-US",
"Hindi": "hi-IN",
"Spanish": "es-ES",
"French": "fr-FR",
"German": "de-DE"
}
selected_language = st.selectbox("Select your language for voice search:", options=list(language_options.keys()))
language_code = language_options[selected_language]
if st.button("ποΈ Use Voice Search"):
user_query = transcribe_audio(language_code)
else:
user_query = st.text_input("Ask a question about the Bhagavad Gita or Yoga Sutras:")
if user_query:
# Use `__call__` to get all outputs as a dictionary
response = chain({"question": user_query})
answer = response.get("answer", "No answer found.")
source_documents = response.get("source_documents", [])
st.markdown("### β
**Answer:**")
st.write(answer)
st.markdown("### π **Source Documents:**")
for doc in source_documents:
st.write(doc)
''' |