Files changed (1) hide show
  1. app.py +210 -0
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import os
2
  import json
3
  import streamlit as st
@@ -5,6 +7,213 @@ from langchain_huggingface import HuggingFaceEmbeddings
5
  from langchain_chroma import Chroma
6
  from langchain.memory import ConversationBufferMemory
7
  from langchain.chains import ConversationalRetrievalChain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from vectorize_documents import embeddings # Import embeddings from the vectorization script
9
  import speech_recognition as sr # For voice recognition
10
 
@@ -130,3 +339,4 @@ if user_query:
130
  st.markdown("### πŸ“„ **Source Documents:**")
131
  for doc in source_documents:
132
  st.write(doc)
 
 
1
+ # NEW CODE
2
+
3
  import os
4
  import json
5
  import streamlit as st
 
7
  from langchain_chroma import Chroma
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain.chains import ConversationalRetrievalChain
10
+ from vectorize_documents import embeddings
11
+ import speech_recognition as sr
12
+ from deep_translator import GoogleTranslator
13
+
14
+ # Set up working directory and API configuration
15
+ working_dir = os.path.dirname(os.path.abspath(__file__))
16
+ config_data = json.load(open(f"{working_dir}/config.json"))
17
+ os.environ["GROQ_API_KEY"] = config_data["GROQ_API_KEY"]
18
+
19
+ # Streamlit session state initialization
20
+ def initialize_session_state():
21
+ if "chat_history" not in st.session_state:
22
+ st.session_state["chat_history"] = []
23
+ if "vectorstore" not in st.session_state:
24
+ st.session_state["vectorstore"] = setup_vectorstore()
25
+ if "chain" not in st.session_state:
26
+ st.session_state["chain"] = chat_chain(st.session_state["vectorstore"])
27
+
28
+ # Vectorstore setup
29
+ def setup_vectorstore():
30
+ embeddings = HuggingFaceEmbeddings()
31
+ vectorstore = Chroma(
32
+ persist_directory=f"{working_dir}/vector_db_dir",
33
+ embedding_function=embeddings
34
+ )
35
+ return vectorstore
36
+
37
+ # Chat chain setup
38
+ def chat_chain(vectorstore):
39
+ from langchain_groq import ChatGroq
40
+ llm = ChatGroq(
41
+ model="llama-3.1-70b-versatile",
42
+ temperature=0
43
+ )
44
+ retriever = vectorstore.as_retriever()
45
+ memory = ConversationBufferMemory(
46
+ memory_key="chat_history",
47
+ return_messages=True
48
+ )
49
+ chain = ConversationalRetrievalChain.from_llm(
50
+ llm=llm,
51
+ retriever=retriever,
52
+ chain_type="stuff",
53
+ memory=memory,
54
+ verbose=True
55
+ )
56
+ return chain
57
+
58
+ # Transcription function
59
+ def transcribe_audio(selected_language):
60
+ try:
61
+ recognizer = sr.Recognizer()
62
+ with sr.Microphone() as source:
63
+ st.write("🎀 Listening... Please ask your question.")
64
+ try:
65
+ audio = recognizer.listen(source, timeout=5)
66
+ query = recognizer.recognize_google(audio, language=selected_language)
67
+ st.write(f"**πŸ—£οΈ You said:** {query}")
68
+ return query
69
+ except sr.WaitTimeoutError:
70
+ st.error("⏳ You didn't speak in time. Please try again.")
71
+ except sr.UnknownValueError:
72
+ st.error("❌ Sorry, could not understand the audio. Please try again.")
73
+ except sr.RequestError as e:
74
+ st.error(f"⚠️ Error with speech recognition service: {e}")
75
+ except AttributeError:
76
+ st.error("❌ Microphone or PyAudio not available. Please check installation.")
77
+ except OSError as e:
78
+ st.error(f"⚠️ Audio input error: {e}")
79
+ return None
80
+
81
+ # Translation functions
82
+ def translate_to_english(text, source_lang):
83
+ if source_lang == "en": # Skip translation if the language is English
84
+ return text
85
+ return GoogleTranslator(source=source_lang, target="en").translate(text)
86
+
87
+ def translate_from_english(text, target_lang):
88
+ if target_lang == "en": # Skip translation if the language is English
89
+ return text
90
+ return GoogleTranslator(source="en", target=target_lang).translate(text)
91
+
92
+ # Streamlit UI
93
+ initialize_session_state()
94
+
95
+ st.markdown(
96
+ """
97
+ <style>
98
+ .main-title {
99
+ font-size: 36px;
100
+ color: #FF8C00;
101
+ font-weight: bold;
102
+ }
103
+ .sub-title {
104
+ font-size: 24px;
105
+ color: #FF8C00;
106
+ }
107
+ .icon {
108
+ font-size: 50px;
109
+ color: #FF8C00;
110
+ }
111
+ </style>
112
+ """,
113
+ unsafe_allow_html=True
114
+ )
115
+
116
+ st.markdown('<div class="icon">πŸ“š</div>', unsafe_allow_html=True)
117
+ st.markdown('<div class="main-title">Bhagavad Gita & Yoga Sutras Query Assistant</div>', unsafe_allow_html=True)
118
+ st.markdown('<div class="sub-title">Ask questions and explore timeless wisdom</div>', unsafe_allow_html=True)
119
+
120
+ # Language support
121
+ indian_languages = {
122
+ "English": "en",
123
+ "Assamese": "as",
124
+ "Bengali": "bn",
125
+ "Gujarati": "gu",
126
+ "Hindi": "hi",
127
+ "Kannada": "kn",
128
+ "Kashmiri": "ks",
129
+ "Konkani": "kok",
130
+ "Malayalam": "ml",
131
+ "Manipuri": "mni",
132
+ "Marathi": "mr",
133
+ "Nepali": "ne",
134
+ "Odia": "or",
135
+ "Punjabi": "pa",
136
+ "Sanskrit": "sa",
137
+ "Santali": "sat",
138
+ "Sindhi": "sd",
139
+ "Tamil": "ta",
140
+ "Telugu": "te",
141
+ "Urdu": "ur",
142
+ "Bodo": "brx",
143
+ "Dogri": "doi",
144
+ "Maithili": "mai",
145
+ "Santhali": "sat",
146
+ "Tulu": "tcy",
147
+ "Bhili/Bhilodi": "bhi",
148
+ "Khasi": "kha",
149
+ "Garo": "grt",
150
+ "Mizo": "lus",
151
+ "Sora": "srb",
152
+ "Ho": "hoc",
153
+ "Kurukh": "kru",
154
+ "Korwa": "kfa",
155
+ "Gondi": "gon",
156
+ "Konkani": "kok"
157
+ }
158
+ selected_language = st.selectbox("Select your language:", options=list(indian_languages.keys()))
159
+ language_code = indian_languages[selected_language]
160
+
161
+ # User-friendly input selection
162
+ st.markdown("### How would you like to ask your question?")
163
+ input_mode = st.radio("Choose input method:", ("Voice", "Typing"))
164
+
165
+ user_query = None # Initialize the variable to ensure it's always defined
166
+
167
+ if input_mode == "Voice":
168
+ st.write("Click the button below to speak your question:")
169
+ if st.button("🎀 Use Voice Input"):
170
+ user_query = transcribe_audio(language_code)
171
+ if user_query:
172
+ user_query = translate_to_english(user_query, language_code)
173
+ else:
174
+ user_query = st.text_input("Type your question here:")
175
+ if user_query:
176
+ user_query = translate_to_english(user_query, language_code)
177
+
178
+ if user_query: # This check will now always work
179
+ with st.spinner("Getting answer..."):
180
+ response = st.session_state["chain"]({"question": user_query})
181
+ relevant_content = response.get("source_documents", None)
182
+
183
+ if not relevant_content:
184
+ st.markdown("### ❌ **No Answer Available:**")
185
+ st.write("The system does not have sufficient information to answer this question.")
186
+ else:
187
+ answer = response.get("answer", None)
188
+ translated_answer = translate_from_english(answer, language_code)
189
+ st.markdown("### βœ… **Answer:**")
190
+ st.write(translated_answer)
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+
207
+
208
+
209
+ # OLD CODE IS BELOW
210
+ '''import os
211
+ import json
212
+ import streamlit as st
213
+ from langchain_huggingface import HuggingFaceEmbeddings
214
+ from langchain_chroma import Chroma
215
+ from langchain.memory import ConversationBufferMemory
216
+ from langchain.chains import ConversationalRetrievalChain
217
  from vectorize_documents import embeddings # Import embeddings from the vectorization script
218
  import speech_recognition as sr # For voice recognition
219
 
 
339
  st.markdown("### πŸ“„ **Source Documents:**")
340
  for doc in source_documents:
341
  st.write(doc)
342
+ '''