Spaces:
Sleeping
Sleeping
Updated by Krish
#3
by
Krish30
- opened
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import os
|
2 |
import json
|
3 |
import streamlit as st
|
@@ -5,6 +7,213 @@ from langchain_huggingface import HuggingFaceEmbeddings
|
|
5 |
from langchain_chroma import Chroma
|
6 |
from langchain.memory import ConversationBufferMemory
|
7 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from vectorize_documents import embeddings # Import embeddings from the vectorization script
|
9 |
import speech_recognition as sr # For voice recognition
|
10 |
|
@@ -130,3 +339,4 @@ if user_query:
|
|
130 |
st.markdown("### π **Source Documents:**")
|
131 |
for doc in source_documents:
|
132 |
st.write(doc)
|
|
|
|
1 |
+
# NEW CODE
|
2 |
+
|
3 |
import os
|
4 |
import json
|
5 |
import streamlit as st
|
|
|
7 |
from langchain_chroma import Chroma
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from langchain.chains import ConversationalRetrievalChain
|
10 |
+
from vectorize_documents import embeddings
|
11 |
+
import speech_recognition as sr
|
12 |
+
from deep_translator import GoogleTranslator
|
13 |
+
|
14 |
+
# Set up working directory and API configuration
|
15 |
+
working_dir = os.path.dirname(os.path.abspath(__file__))
|
16 |
+
config_data = json.load(open(f"{working_dir}/config.json"))
|
17 |
+
os.environ["GROQ_API_KEY"] = config_data["GROQ_API_KEY"]
|
18 |
+
|
19 |
+
# Streamlit session state initialization
|
20 |
+
def initialize_session_state():
|
21 |
+
if "chat_history" not in st.session_state:
|
22 |
+
st.session_state["chat_history"] = []
|
23 |
+
if "vectorstore" not in st.session_state:
|
24 |
+
st.session_state["vectorstore"] = setup_vectorstore()
|
25 |
+
if "chain" not in st.session_state:
|
26 |
+
st.session_state["chain"] = chat_chain(st.session_state["vectorstore"])
|
27 |
+
|
28 |
+
# Vectorstore setup
|
29 |
+
def setup_vectorstore():
|
30 |
+
embeddings = HuggingFaceEmbeddings()
|
31 |
+
vectorstore = Chroma(
|
32 |
+
persist_directory=f"{working_dir}/vector_db_dir",
|
33 |
+
embedding_function=embeddings
|
34 |
+
)
|
35 |
+
return vectorstore
|
36 |
+
|
37 |
+
# Chat chain setup
|
38 |
+
def chat_chain(vectorstore):
|
39 |
+
from langchain_groq import ChatGroq
|
40 |
+
llm = ChatGroq(
|
41 |
+
model="llama-3.1-70b-versatile",
|
42 |
+
temperature=0
|
43 |
+
)
|
44 |
+
retriever = vectorstore.as_retriever()
|
45 |
+
memory = ConversationBufferMemory(
|
46 |
+
memory_key="chat_history",
|
47 |
+
return_messages=True
|
48 |
+
)
|
49 |
+
chain = ConversationalRetrievalChain.from_llm(
|
50 |
+
llm=llm,
|
51 |
+
retriever=retriever,
|
52 |
+
chain_type="stuff",
|
53 |
+
memory=memory,
|
54 |
+
verbose=True
|
55 |
+
)
|
56 |
+
return chain
|
57 |
+
|
58 |
+
# Transcription function
|
59 |
+
def transcribe_audio(selected_language):
|
60 |
+
try:
|
61 |
+
recognizer = sr.Recognizer()
|
62 |
+
with sr.Microphone() as source:
|
63 |
+
st.write("π€ Listening... Please ask your question.")
|
64 |
+
try:
|
65 |
+
audio = recognizer.listen(source, timeout=5)
|
66 |
+
query = recognizer.recognize_google(audio, language=selected_language)
|
67 |
+
st.write(f"**π£οΈ You said:** {query}")
|
68 |
+
return query
|
69 |
+
except sr.WaitTimeoutError:
|
70 |
+
st.error("β³ You didn't speak in time. Please try again.")
|
71 |
+
except sr.UnknownValueError:
|
72 |
+
st.error("β Sorry, could not understand the audio. Please try again.")
|
73 |
+
except sr.RequestError as e:
|
74 |
+
st.error(f"β οΈ Error with speech recognition service: {e}")
|
75 |
+
except AttributeError:
|
76 |
+
st.error("β Microphone or PyAudio not available. Please check installation.")
|
77 |
+
except OSError as e:
|
78 |
+
st.error(f"β οΈ Audio input error: {e}")
|
79 |
+
return None
|
80 |
+
|
81 |
+
# Translation functions
|
82 |
+
def translate_to_english(text, source_lang):
|
83 |
+
if source_lang == "en": # Skip translation if the language is English
|
84 |
+
return text
|
85 |
+
return GoogleTranslator(source=source_lang, target="en").translate(text)
|
86 |
+
|
87 |
+
def translate_from_english(text, target_lang):
|
88 |
+
if target_lang == "en": # Skip translation if the language is English
|
89 |
+
return text
|
90 |
+
return GoogleTranslator(source="en", target=target_lang).translate(text)
|
91 |
+
|
92 |
+
# Streamlit UI
|
93 |
+
initialize_session_state()
|
94 |
+
|
95 |
+
st.markdown(
|
96 |
+
"""
|
97 |
+
<style>
|
98 |
+
.main-title {
|
99 |
+
font-size: 36px;
|
100 |
+
color: #FF8C00;
|
101 |
+
font-weight: bold;
|
102 |
+
}
|
103 |
+
.sub-title {
|
104 |
+
font-size: 24px;
|
105 |
+
color: #FF8C00;
|
106 |
+
}
|
107 |
+
.icon {
|
108 |
+
font-size: 50px;
|
109 |
+
color: #FF8C00;
|
110 |
+
}
|
111 |
+
</style>
|
112 |
+
""",
|
113 |
+
unsafe_allow_html=True
|
114 |
+
)
|
115 |
+
|
116 |
+
st.markdown('<div class="icon">π</div>', unsafe_allow_html=True)
|
117 |
+
st.markdown('<div class="main-title">Bhagavad Gita & Yoga Sutras Query Assistant</div>', unsafe_allow_html=True)
|
118 |
+
st.markdown('<div class="sub-title">Ask questions and explore timeless wisdom</div>', unsafe_allow_html=True)
|
119 |
+
|
120 |
+
# Language support
|
121 |
+
indian_languages = {
|
122 |
+
"English": "en",
|
123 |
+
"Assamese": "as",
|
124 |
+
"Bengali": "bn",
|
125 |
+
"Gujarati": "gu",
|
126 |
+
"Hindi": "hi",
|
127 |
+
"Kannada": "kn",
|
128 |
+
"Kashmiri": "ks",
|
129 |
+
"Konkani": "kok",
|
130 |
+
"Malayalam": "ml",
|
131 |
+
"Manipuri": "mni",
|
132 |
+
"Marathi": "mr",
|
133 |
+
"Nepali": "ne",
|
134 |
+
"Odia": "or",
|
135 |
+
"Punjabi": "pa",
|
136 |
+
"Sanskrit": "sa",
|
137 |
+
"Santali": "sat",
|
138 |
+
"Sindhi": "sd",
|
139 |
+
"Tamil": "ta",
|
140 |
+
"Telugu": "te",
|
141 |
+
"Urdu": "ur",
|
142 |
+
"Bodo": "brx",
|
143 |
+
"Dogri": "doi",
|
144 |
+
"Maithili": "mai",
|
145 |
+
"Santhali": "sat",
|
146 |
+
"Tulu": "tcy",
|
147 |
+
"Bhili/Bhilodi": "bhi",
|
148 |
+
"Khasi": "kha",
|
149 |
+
"Garo": "grt",
|
150 |
+
"Mizo": "lus",
|
151 |
+
"Sora": "srb",
|
152 |
+
"Ho": "hoc",
|
153 |
+
"Kurukh": "kru",
|
154 |
+
"Korwa": "kfa",
|
155 |
+
"Gondi": "gon",
|
156 |
+
"Konkani": "kok"
|
157 |
+
}
|
158 |
+
selected_language = st.selectbox("Select your language:", options=list(indian_languages.keys()))
|
159 |
+
language_code = indian_languages[selected_language]
|
160 |
+
|
161 |
+
# User-friendly input selection
|
162 |
+
st.markdown("### How would you like to ask your question?")
|
163 |
+
input_mode = st.radio("Choose input method:", ("Voice", "Typing"))
|
164 |
+
|
165 |
+
user_query = None # Initialize the variable to ensure it's always defined
|
166 |
+
|
167 |
+
if input_mode == "Voice":
|
168 |
+
st.write("Click the button below to speak your question:")
|
169 |
+
if st.button("π€ Use Voice Input"):
|
170 |
+
user_query = transcribe_audio(language_code)
|
171 |
+
if user_query:
|
172 |
+
user_query = translate_to_english(user_query, language_code)
|
173 |
+
else:
|
174 |
+
user_query = st.text_input("Type your question here:")
|
175 |
+
if user_query:
|
176 |
+
user_query = translate_to_english(user_query, language_code)
|
177 |
+
|
178 |
+
if user_query: # This check will now always work
|
179 |
+
with st.spinner("Getting answer..."):
|
180 |
+
response = st.session_state["chain"]({"question": user_query})
|
181 |
+
relevant_content = response.get("source_documents", None)
|
182 |
+
|
183 |
+
if not relevant_content:
|
184 |
+
st.markdown("### β **No Answer Available:**")
|
185 |
+
st.write("The system does not have sufficient information to answer this question.")
|
186 |
+
else:
|
187 |
+
answer = response.get("answer", None)
|
188 |
+
translated_answer = translate_from_english(answer, language_code)
|
189 |
+
st.markdown("### β
**Answer:**")
|
190 |
+
st.write(translated_answer)
|
191 |
+
|
192 |
+
|
193 |
+
|
194 |
+
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
|
199 |
+
|
200 |
+
|
201 |
+
|
202 |
+
|
203 |
+
|
204 |
+
|
205 |
+
|
206 |
+
|
207 |
+
|
208 |
+
|
209 |
+
# OLD CODE IS BELOW
|
210 |
+
'''import os
|
211 |
+
import json
|
212 |
+
import streamlit as st
|
213 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
214 |
+
from langchain_chroma import Chroma
|
215 |
+
from langchain.memory import ConversationBufferMemory
|
216 |
+
from langchain.chains import ConversationalRetrievalChain
|
217 |
from vectorize_documents import embeddings # Import embeddings from the vectorization script
|
218 |
import speech_recognition as sr # For voice recognition
|
219 |
|
|
|
339 |
st.markdown("### π **Source Documents:**")
|
340 |
for doc in source_documents:
|
341 |
st.write(doc)
|
342 |
+
'''
|