samlonka commited on
Commit
5e00ed4
·
1 Parent(s): f9012d5

'first_commit'

Browse files
Files changed (10) hide show
  1. .gitignore +1 -0
  2. README.md +4 -4
  3. app.py +157 -0
  4. crag.py +342 -0
  5. database.py +87 -0
  6. function_tools.py +528 -0
  7. ramana_docs_ids.pkl +3 -0
  8. requirements.txt +24 -0
  9. utils.py +147 -0
  10. vector_tool.py +93 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .streamlit/*
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Veda Bot 3.0
3
- emoji: 👀
4
- colorFrom: gray
5
- colorTo: indigo
6
  sdk: streamlit
7
  sdk_version: 1.33.0
8
  app_file: app.py
 
1
  ---
2
+ title: Veda Bot 2.0
3
+ emoji: 🏢
4
+ colorFrom: pink
5
+ colorTo: yellow
6
  sdk: streamlit
7
  sdk_version: 1.33.0
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import uuid
4
+ from streamlit_feedback import streamlit_feedback
5
+ import streamlit as st
6
+ from langchain_openai import ChatOpenAI
7
+ from langchain_core.messages import HumanMessage
8
+ from langchain.memory import ChatMessageHistory
9
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
+ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
11
+ from langchain_core.prompts import HumanMessagePromptTemplate
12
+ import tiktoken
13
+ #from agent import app
14
+ from crag import crag_app
15
+ from datetime import timedelta
16
+ from sqlalchemy import create_engine
17
+ #from cache import (write_to_db,current_time)
18
+
19
+
20
+ #load postgres engine
21
+ #engine = create_engine("postgresql://postgres:sampath@localhost:5432/postgres")
22
+ #load keys
23
+ os.environ['OPENAI_API_KEY'] = st.secrets["OPENAI_API_KEY"]
24
+ chat_history = ChatMessageHistory()
25
+
26
+ system_message = '''You are an AI assistant for answering questions about vedas and scriptures.
27
+ \nYou are given the following extracted documents from Svarupa Knowledge Base (https://svarupa.org/) and other documents and a question.
28
+ Provide a conversational answer. If there are any unicode characters in the final answer, please encode and provide readable answer to the user.
29
+ \nIf you are not provided with any documents, say \"I did not get any relevant context for this but
30
+ I will reply to the best of my knowledge\" and then write your answer\nIf you don't know the answer, just say \"Hmm, I'm not sure. \" Don't try to make up an answer.
31
+ \nIf the question is not about vedas and scriptures, politely inform them that you are tuned to only answer questions about that.\n\n'''
32
+ generate_prompt = ChatPromptTemplate.from_messages(
33
+ [
34
+ ("system", system_message),
35
+ ("human", "Here is the given context {context}, queation: {question} \n\n Formulate an answer."),
36
+ ]
37
+ )
38
+ #@st.cache_resource(show_spinner=False) # Set allow_output_mutation to True for mutable objects like instances
39
+ def bot_response(user_input):
40
+ response = crag_app.invoke({"question": user_input})
41
+ return response
42
+
43
+
44
+ ##======
45
+ # Main chatbot function
46
+ def veda_bot(sidebar: bool = True) -> None:
47
+ # Define custom CSS
48
+ custom_css = """
49
+ <style>
50
+ /* Adjust the selector as needed */
51
+ .stHeadingContainer {
52
+ margin-top: -100px; /* Reduce the top margin */
53
+ }
54
+ #MainMenu {visibility: hidden;}
55
+ footer {visibility: hidden;}
56
+ header {visibility: hidden;}
57
+ </style>
58
+ """
59
+
60
+ # Apply the custom CSS
61
+ st.markdown(custom_css, unsafe_allow_html=True)
62
+
63
+ # Streamlit Components Initialization
64
+ st.title("Veda Bot")
65
+ st.write("This bot is developed based on the content from the [Svarupa](https://svarupa.org/home) website.")
66
+ chat_history.add_message(SystemMessage(content="Welcome! I am your Veda Bot. How can I assist you today?"))
67
+ # Initialize session state variables
68
+ if "messages" not in st.session_state.keys():
69
+ st.session_state.messages = [{"role": "assistant", "content": "Hi. I am an AI Assistant. Ask me a question about Vedas!"}]
70
+
71
+ #if "session_uuid" not in st.session_state:
72
+ # st.session_state["session_uuid"] = f"{current_time()}-{str(uuid.uuid4())}"
73
+
74
+ if "feedback" not in st.session_state:
75
+ st.session_state["feedback"] = None
76
+
77
+ if "chat_engine" not in st.session_state.keys():
78
+ st.session_state.chat_engine = bot_response
79
+
80
+ if "memory" not in st.session_state:
81
+ st.session_state["memory"] = ChatMessageHistory()
82
+ st.session_state["memory"].add_message(generate_prompt)
83
+ st.session_state["memory"].add_message({"role":"user","content":"Hi/Hello or Any Greating"})
84
+ st.session_state["memory"].add_message({"role":"assistant","content":"Hi. Please ask the question about vedas!"})
85
+ # Display chat history
86
+ for message in st.session_state.messages:
87
+ with st.chat_message(message["role"]):
88
+ st.markdown(message["content"])
89
+
90
+ # Get user input
91
+ prompt = st.chat_input("Enter your question!")
92
+
93
+ if prompt:
94
+ # Display user message in chat message container
95
+ with st.chat_message("user"):
96
+ st.markdown(prompt)
97
+
98
+ # Log user message
99
+ st.session_state["messages"].append({"role": "user", "content": prompt})
100
+ st.session_state["memory"].add_message({"role": "user", "content": prompt})
101
+
102
+ # Generate bot response
103
+ if st.session_state.messages[-1]["role"] != "assistant":
104
+ with st.spinner("Thinking..."):
105
+ references = []
106
+ message_placeholder = st.empty()
107
+ full_response = ""
108
+
109
+ # Get bot response
110
+ response_bot = st.session_state.chat_engine(prompt)
111
+ generation = response_bot['generation']
112
+ full_response += generation
113
+ web_search = response_bot['web_search']
114
+
115
+ # Extract references from bot response
116
+ if response_bot['messages']:
117
+ try:
118
+ references.extend([doc.metadata['source'] for doc in response_bot['messages']])
119
+ except Exception as e:
120
+ print("Error:", e)
121
+ #message_placeholder.markdown(full_response + "▌")
122
+
123
+ # Add references to the full response
124
+ if references:
125
+ unique_references = set(references)
126
+ full_response += "\n\n**References:**\n\n"
127
+ for reference in unique_references:
128
+ full_response += f"- {reference}\n"
129
+
130
+ #message_placeholder.markdown(full_response + "▌")
131
+ # Submit Feedback
132
+ streamlit_feedback(
133
+ feedback_type="faces",
134
+ on_submit=None,
135
+ optional_text_label="[Optional] Please provide an explanation",
136
+ key="feedback",
137
+ )
138
+ message_placeholder.markdown(full_response)
139
+ st.session_state["messages"].append({"role": "assistant", "content": generation})
140
+ st.session_state["memory"].add_message({"role": "assistant", "content": generation})
141
+ print(f"Response added to memory: {full_response}")
142
+
143
+ # Log feedback and messages
144
+ if st.session_state['feedback']:
145
+ user_feedback ={
146
+ "user_message": st.session_state["messages"][-2],
147
+ "assistant_message": st.session_state["messages"][-1],
148
+ "feedback_score": st.session_state["feedback"]["score"],
149
+ "feedback_text": st.session_state["feedback"]["text"],
150
+ }
151
+ #write_to_db(u_message=user_feedback["user_message"],
152
+ # a_message=user_feedback["assistant_message"],
153
+ # f_score=user_feedback["feedback_score"],
154
+ # f_text=user_feedback["feedback_text"])
155
+
156
+ if __name__ == "__main__":
157
+ veda_bot()
crag.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from vector_tool import ensemble_retriever
4
+ from langgraph.prebuilt import ToolInvocation
5
+ from langchain_core.messages import ToolMessage
6
+ import json
7
+ # Set up the tools to execute them from the graph
8
+ from langgraph.prebuilt import ToolExecutor
9
+ # tools retrieval
10
+ from function_tools import tool_chain
11
+ from vector_tool import ensemble_retriever
12
+
13
+ os.environ['OPENAI_API_KEY'] = st.secrets["OPENAI_API_KEY"]
14
+ os.environ['TAVILY_API_KEY'] = st.secrets["TAVILY_API_KEY"]
15
+
16
+ ### Retrieval Grader
17
+
18
+ from langchain_openai import ChatOpenAI
19
+ from langchain_core.prompts import ChatPromptTemplate
20
+ from langchain_core.pydantic_v1 import BaseModel, Field
21
+
22
+ #LLM models
23
+ llm_AI4 = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
24
+
25
+ # Data model
26
+ class GradeDocuments(BaseModel):
27
+ """Binary score for relevance check on retrieved documents."""
28
+
29
+ binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")
30
+
31
+ # LLM with function call
32
+ structured_llm_grader = llm_AI4.with_structured_output(GradeDocuments)
33
+
34
+ # Prompt
35
+ system = """You are a grader assessing relevance of a retrieved document to a user question. \n
36
+ If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
37
+ Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
38
+ grade_prompt = ChatPromptTemplate.from_messages(
39
+ [
40
+ ("system", system),
41
+ ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
42
+ ]
43
+ )
44
+
45
+ retrieval_grader = grade_prompt | structured_llm_grader
46
+
47
+ ### Generate
48
+ from langchain import hub
49
+ from langchain.prompts import MessagesPlaceholder
50
+ from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
51
+ from langchain.prompts import MessagesPlaceholder
52
+ from langchain.agents.format_scratchpad.openai_tools import (
53
+ format_to_openai_tool_messages
54
+ )
55
+ from langchain_core.messages import AIMessage, FunctionMessage, HumanMessage
56
+ from langchain_core.output_parsers import StrOutputParser
57
+ from typing import Any, List, Union
58
+ # Prompt
59
+ #prompt = hub.pull("rlm/rag-prompt")
60
+ system_message = '''You are an AI assistant for answering questions about vedas and scriptures.
61
+ \nYou are given the following extracted documents from Svarupa Knowledge Base (https://svarupa.org/) and other documents and a question.
62
+ Provide a conversational answer.\nIf you are not provided with any documents, say \"I did not get any relevant context for this but
63
+ I will reply to the best of my knowledge\" and then write your answer\nIf you don't know the answer, just say \"Hmm, I'm not sure. \" Don't try to make up an answer.
64
+ \nIf the question is not about vedas and scriptures, politely inform them that you are tuned to only answer questions about that.\n\n'''
65
+
66
+ generate_prompt = ChatPromptTemplate.from_messages(
67
+ [
68
+ ("system", system_message),
69
+ ("human", "Here is the given context {context}, queation: {question} \n\n Formulate an answer."),
70
+ ]
71
+ )
72
+ # LLM
73
+ llm_AI = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
74
+
75
+ # Post-processing
76
+ def format_docs(docs):
77
+ return "\n\n".join(doc.page_content for doc in docs)
78
+
79
+ # Chain
80
+ rag_chain = generate_prompt | llm_AI4 | StrOutputParser() #OpenAIToolsAgentOutputParser()
81
+
82
+ ####-----------------TESTING
83
+ prompt = ChatPromptTemplate.from_messages(
84
+ [
85
+ (
86
+ "system",
87
+ "You are a helpful assistant. Answer all questions to the best of your ability.",
88
+ ),
89
+ MessagesPlaceholder(variable_name="chat_history"),
90
+ ("human", "{question}"),
91
+ ]
92
+ )
93
+ from langchain_core.runnables.history import RunnableWithMessageHistory
94
+ from langchain.memory import ChatMessageHistory
95
+
96
+ chat_history_for_chain = ChatMessageHistory()
97
+
98
+ chain_with_message_history = RunnableWithMessageHistory(
99
+ rag_chain,
100
+ lambda session_id: chat_history_for_chain,
101
+ input_messages_key="question",
102
+ history_messages_key="chat_history",
103
+ )
104
+
105
+ ### Question Re-writer
106
+
107
+ # LLM
108
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
109
+
110
+ # Prompt
111
+ system = """You a question re-writer that converts an input question to a better version that is optimized \n
112
+ for a search. Look at the input and try to reason about the underlying sematic intent / meaning."""
113
+ re_write_prompt = ChatPromptTemplate.from_messages(
114
+ [
115
+ ("system", system),
116
+ ("human", "Here is the initial question: \n\n {question} \n Formulate an improved question."),
117
+ ]
118
+ )
119
+
120
+ question_rewriter = re_write_prompt | llm | StrOutputParser()
121
+
122
+
123
+
124
+ ### Search
125
+
126
+ from langchain_community.tools.tavily_search import TavilySearchResults
127
+ web_search_tool = TavilySearchResults(k=2)
128
+
129
+ from typing_extensions import TypedDict
130
+ from typing import List
131
+ from typing import TypedDict, Annotated, Sequence
132
+ import operator
133
+ from langchain_core.messages import BaseMessage
134
+
135
+ class GraphState(TypedDict):
136
+ """
137
+ Represents the state of our graph.
138
+
139
+ Attributes:
140
+ question: question
141
+ generation: LLM generation
142
+ web_search: whether to add search
143
+ documents: list of documents
144
+ """
145
+ question : str
146
+ generation : str
147
+ web_search : str
148
+ messages: List[str] #Union[dict[str, Any]]
149
+
150
+ from langchain.schema import Document
151
+
152
+
153
+
154
+ def retrieve(state):
155
+ """
156
+ Retrieve documents
157
+
158
+ Args:
159
+ state (dict): The current graph state
160
+
161
+ Returns:
162
+ state (dict): New key added to state, documents, that contains retrieved documents
163
+ """
164
+ print("---VECTOR RETRIEVE---")
165
+ question = state["question"]
166
+ # Retrieval
167
+ documents = ensemble_retriever.get_relevant_documents(question)
168
+ #print(documents)
169
+ # Iterate over each document and update the 'metadata' field with the file name
170
+ for doc in documents:
171
+ try:
172
+ file_path = doc.metadata['source']
173
+ #print(file_path)
174
+ file_name = os.path.split(file_path)[1] # Get the file name from the file path
175
+ doc.metadata['source'] = file_name
176
+ except KeyError:
177
+ # Handle the case where 'source' field is missing in the metadata
178
+ doc.metadata['source'] = 'unavailable'
179
+ except Exception as e:
180
+ # Handle any other exceptions that may occur
181
+ print(f"An error occurred while processing document: {e}")
182
+ return {"messages": documents, "question": question}
183
+
184
+
185
+ def generate(state):
186
+ """
187
+ Generate answer
188
+
189
+ Args:
190
+ state (dict): The current graph state
191
+
192
+ Returns:
193
+ state (dict): New key added to state, generation, that contains LLM generation
194
+ """
195
+ print("---GENERATE---")
196
+ question = state["question"]
197
+ messages = state["messages"]
198
+ print(messages)
199
+ # RAG generation
200
+ generation = chain_with_message_history.invoke({"context": messages, "question": question},{"configurable": {"session_id": "unused"}})
201
+ return {"messages": messages, "question": question, "generation": generation}
202
+
203
+ def grade_documents(state):
204
+ """
205
+ Determines whether the retrieved documents are relevant to the question.
206
+
207
+ Args:
208
+ state (dict): The current graph state
209
+
210
+ Returns:
211
+ state (dict): Updates documents key with only filtered relevant documents
212
+ """
213
+
214
+ print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
215
+ question = state["question"]
216
+ messages = state["messages"]
217
+
218
+ # Score each doc
219
+ filtered_docs = []
220
+ web_search = "No"
221
+ for d in messages:
222
+ score = retrieval_grader.invoke({"question": question, "document": d.page_content})
223
+ grade = score.binary_score
224
+ if grade == "yes":
225
+ print("---GRADE: DOCUMENT RELEVANT---")
226
+ filtered_docs.append(d)
227
+ else:
228
+ print("---GRADE: DOCUMENT NOT RELEVANT---")
229
+ continue
230
+ print("---TOOLS RETRIEVE---")
231
+ tool_documents = tool_chain.invoke(question)
232
+ #print(tool_documents)
233
+ if tool_documents:
234
+ for item in tool_documents:
235
+ filtered_docs.append(Document(page_content=str(item['output']),metadata={"source": 'https://svarupa.org/home',"name":item['name']}))
236
+ # If filtered_docs is empty, perform a web search
237
+ if not filtered_docs:
238
+ print("--PERFORMING WEB SEARCH--")
239
+ web_search = "Yes"
240
+
241
+ return {"messages": filtered_docs, "question": question, "web_search": web_search}
242
+
243
+
244
+
245
+ def transform_query(state):
246
+ """
247
+ Transform the query to produce a better question.
248
+
249
+ Args:
250
+ state (dict): The current graph state
251
+
252
+ Returns:
253
+ state (dict): Updates question key with a re-phrased question
254
+ """
255
+
256
+ print("---TRANSFORM QUERY---")
257
+ question = state["question"]
258
+ messages = state["messages"]
259
+
260
+ # Re-write question
261
+ better_question = question_rewriter.invoke({"question": question})
262
+ return {"messages": messages, "question": better_question}
263
+
264
+ def web_search(state):
265
+ """
266
+ Web search based on the re-phrased question.
267
+
268
+ Args:
269
+ state (dict): The current graph state
270
+
271
+ Returns:
272
+ state (dict): Updates documents key with appended web results
273
+ """
274
+
275
+ print("---WEB SEARCH---")
276
+ question = state["question"]
277
+ messages = state["messages"]
278
+
279
+ # Web search
280
+ docs = web_search_tool.invoke({"query": question})
281
+ print(docs)
282
+ #web_results = "\n".join([d["content"] for d in docs])
283
+ web_results = [Document(page_content=d["content"], metadata={"source": d["url"]}) for d in docs]
284
+ print(f"Web Results: {web_results}")
285
+ messages.extend(web_results)
286
+ return {"messages": messages, "question": question}
287
+
288
+ ### Edges
289
+
290
+ def decide_to_generate(state):
291
+ """
292
+ Determines whether to generate an answer, or re-generate a question.
293
+
294
+ Args:
295
+ state (dict): The current graph state
296
+
297
+ Returns:
298
+ str: Binary decision for next node to call
299
+ """
300
+
301
+ print("---ASSESS GRADED DOCUMENTS---")
302
+ question = state["question"]
303
+ web_search = state["web_search"]
304
+ filtered_documents = state["messages"]
305
+
306
+ if web_search == "Yes":
307
+ # All documents have been filtered check_relevance
308
+ # We will re-generate a new query
309
+ print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---")
310
+ return "transform_query"
311
+ else:
312
+ # We have relevant documents, so generate answer
313
+ print("---DECISION: GENERATE---")
314
+ return "generate"
315
+ from langgraph.graph import END, StateGraph
316
+
317
+ workflow = StateGraph(GraphState)
318
+
319
+ # Define the nodes
320
+ workflow.add_node("retrieve", retrieve) # retrieve
321
+ workflow.add_node("grade_documents", grade_documents) # grade documents
322
+ workflow.add_node("generate", generate) # generatae
323
+ workflow.add_node("transform_query", transform_query) # transform_query
324
+ workflow.add_node("web_search_node", web_search) # web search
325
+
326
+ # Build graph
327
+ workflow.set_entry_point("retrieve")
328
+ workflow.add_edge("retrieve", "grade_documents")
329
+ workflow.add_conditional_edges(
330
+ "grade_documents",
331
+ decide_to_generate,
332
+ {
333
+ "transform_query": "transform_query",
334
+ "generate": "generate",
335
+ },
336
+ )
337
+ workflow.add_edge("transform_query", "web_search_node")
338
+ workflow.add_edge("web_search_node", "generate")
339
+ workflow.add_edge("generate", END)
340
+
341
+ # Compile
342
+ crag_app = workflow.compile()
database.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pymysql
2
+ import streamlit as st
3
+ import logging
4
+ import json
5
+ import pandas as pd
6
+ import re
7
+ import os
8
+ from langchain_community.utilities.sql_database import SQLDatabase
9
+
10
+
11
+ db_params = {"host": os.getenv("DB_HOST"),
12
+ "user": os.getenv("DB_USER"),
13
+ "password": os.getenv("DB_PASSWORD"),
14
+ "port": 3306,
15
+ "database":os.getenv("DB")
16
+ }
17
+
18
+
19
+
20
+ def initialize_database():
21
+ try:
22
+ # Database Connection
23
+ db = pymysql.connect(**db_params)
24
+ st.success("Database connection successful!")
25
+ return db
26
+ except Exception as e:
27
+ st.error(f"Database connection failed: {e}")
28
+ return None
29
+
30
+
31
+ def get_db():
32
+ try:
33
+ db = SQLDatabase.from_uri(
34
+ f"mysql+pymysql://{db_params['user']}:{db_params['password']}@{db_params['host']}/{db_params['database']}",
35
+ include_tables=['term_details_modified', 'veda_content_details', 'veda_content_modified']
36
+ )
37
+ #st.success("Database connection successful!")
38
+ return db
39
+ except Exception as e:
40
+ st.error(f"Database connection failed: {e}")
41
+ return None
42
+
43
+
44
+ def execute_query(query):
45
+ db = initialize_database()
46
+ cursor = db.cursor()
47
+ try:
48
+ cursor.execute(query)
49
+ description = cursor.description
50
+ result = cursor.fetchall() # Fetch all rows from the result set
51
+ db.commit()
52
+ return description, result
53
+ except Exception as e:
54
+ print("Error executing query:", e)
55
+ db.rollback()
56
+ return None # Return None if an error occurs
57
+ finally:
58
+ db.close()
59
+
60
+
61
+ def execute_sql_query(query, parameters=None):
62
+ # Establish database connection and execute SQL query
63
+ db = initialize_database()
64
+ cursor = db.cursor(pymysql.cursors.DictCursor) # Use dictionary cursor to retrieve data as dictionaries
65
+ try:
66
+ if parameters:
67
+ cursor.execute(query, parameters)
68
+ else:
69
+ cursor.execute(query)
70
+ results = cursor.fetchall()
71
+ return results
72
+ except Exception as e:
73
+ logging.error(f"Error executing SQL query: {e}")
74
+ return None
75
+ finally:
76
+ db.close()
77
+
78
+
79
+ def get_details_mantra_json(query):
80
+ description, data = execute_query(query)
81
+ df = pd.DataFrame(data)
82
+ df.columns = [x[0] for x in description]
83
+ mantra_json = df['mantra_json'].values[0]
84
+ cleaned_data = re.sub('<[^<]+?>', '', mantra_json)
85
+ return json.loads(cleaned_data)
86
+
87
+
function_tools.py ADDED
@@ -0,0 +1,528 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ import json
3
+ import logging
4
+ from utils import iast_process, get_list_meaning_word, get_details_mantra_json, word_sentence_similarity, extract_meaning_by_language
5
+ import ast
6
+ from langchain_core.tools import tool
7
+ from database import execute_sql_query, get_details_mantra_json
8
+ from langchain.pydantic_v1 import BaseModel, Field
9
+ from langchain.tools import StructuredTool
10
+ from typing import Optional
11
+ import streamlit as st
12
+ from langchain_core.utils.function_calling import convert_to_openai_function
13
+ from langchain_core.messages import AIMessage
14
+ from langchain_core.runnables import Runnable
15
+ import os
16
+
17
+ os.environ['OPENAI_API_KEY'] = st.secrets["OPENAI_API_KEY"]
18
+ from langchain_openai import ChatOpenAI
19
+
20
+ #LLM
21
+ llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
22
+ llm_AI4 = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
23
+
24
+
25
+
26
+ #Classes
27
+ class MantraInput(BaseModel):
28
+ mantraid: Optional[str] = Field(None, description="The mantra id. For example, 1.1.1.1, 2.1.1,3.1.1.2,4.2.3.1, and 5.0.1.1.2")
29
+ scripture_name: Optional[str] = Field(None, description="Name of the scripture like RigVeda, SamaVeda, AtharvaVeda, KrishnaYajurVeda, and ShuklaYajurVeda")
30
+ KandahNumber: Optional[int] = Field(None, description="Kandah Number of Vedamantra")
31
+ MandalaNumber: Optional[int] = Field(None, description="Mandala Number of Vedamantra")
32
+ ArchikahNumber: Optional[int] = Field(None, description="Archikah Number of Vedamantra")
33
+ ShuktaNumber: Optional[int] = Field(None, description="Shukta Number of Vedamantra")
34
+ PrapatakNumber: Optional[int] = Field(None, description="Prapatak Number of Vedamantra")
35
+ MantraNumber: Optional[int] = Field(None, description="Mantra Number of Vedamantra")
36
+ AnuvakNumber: Optional[int] = Field(None, description="Anuvak Number of Vedamantra")
37
+ AdhyayaNumber: Optional[int] = Field(None, description="Adhyaya Number of Vedamantra")
38
+
39
+ class PadaMeaningInput(BaseModel):
40
+ pada: str = Field(description="The pada or word that is being meaning checked")
41
+
42
+ class PadaAAAInput(BaseModel):
43
+ pada: str = Field(description="The pada or word that is being meaning checked")
44
+ mantraid: Optional[str] = Field(None, description="The mantra id. For example, 1.1.1.1, 2.1.1,3.1.1.2,4.2.3.1, and 5.0.1.1.2")
45
+ scripture_name: Optional[str] = Field(None, description="Name of the scripture like RigVeda, SamaVeda, AtharvaVeda, KrishnaYajurVeda, and ShuklaYajurVeda")
46
+ KandahNumber: Optional[int] = Field(None, description="Kandah Number of Vedamantra")
47
+ MandalaNumber: Optional[int] = Field(None, description="Mandala Number of Vedamantra")
48
+ ArchikahNumber: Optional[int] = Field(None, description="Archikah Number of Vedamantra")
49
+ ShuktaNumber: Optional[int] = Field(None, description="Shukta Number of Vedamantra")
50
+ PrapatakNumber: Optional[int] = Field(None, description="Prapatak Number of Vedamantra")
51
+ MantraNumber: Optional[int] = Field(None, description="Mantra Number of Vedamantra")
52
+ AnuvakNumber: Optional[int] = Field(None, description="Anuvak Number of Vedamantra")
53
+ AdhyayaNumber: Optional[int] = Field(None, description="Adhyaya Number of Vedamantra")
54
+
55
+ class NLSQLResponse(BaseModel):
56
+ user_query: str = Field(description="user query")
57
+
58
+ class VectorResponse(BaseModel):
59
+ query: str = Field(description="User query")
60
+
61
+ class Response(BaseModel):
62
+ result: str = Field(description="The result based on the context. Provide the text in a readable format if there are unicode characters. Use only available context. If there is no context, return as 'unknown'. Do not use prior knowledge.")
63
+ explanation: str = Field(description="Explanation of the steps taken to get the result")
64
+
65
+ #function tools for mantra level
66
+ def _get_mantra_details(query):
67
+ try:
68
+ details = get_details_mantra_json(query)
69
+ return details['mantraHeader']['language'][1]
70
+ except Exception as e:
71
+ raise ValueError(f"Failed to get mantra details: {e}")
72
+
73
+
74
+ def _get_mantra_details_by_scripture(scripture_name=None, KandahNumber=None, MandalaNumber=None, ArchikahNumber=None,
75
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None, AnuvakNumber=None,
76
+ AdhyayaNumber=None):
77
+ try:
78
+ # Construct the base SQL query
79
+ query = "SELECT * FROM veda_content_details WHERE 1 = 1"
80
+ parameters = []
81
+
82
+ # Add conditions based on provided parameters
83
+ if scripture_name:
84
+ query += " AND scripture_name = %s"
85
+ parameters.append(scripture_name.lower())
86
+ if KandahNumber:
87
+ query += " AND KandahNumber = %s"
88
+ parameters.append(KandahNumber)
89
+ if MandalaNumber:
90
+ query += " AND MandalaNumber = %s"
91
+ parameters.append(MandalaNumber)
92
+ if ArchikahNumber:
93
+ query += " AND ArchikahNumber = %s"
94
+ parameters.append(ArchikahNumber)
95
+ if ShuktaNumber:
96
+ query += " AND ShuktaNumber = %s"
97
+ parameters.append(ShuktaNumber)
98
+ if PrapatakNumber:
99
+ query += " AND PrapatakNumber = %s"
100
+ parameters.append(PrapatakNumber)
101
+ if MantraNumber:
102
+ query += " AND MantraNumber = %s"
103
+ parameters.append(MantraNumber)
104
+ if AnuvakNumber:
105
+ query += " AND AnuvakNumber = %s"
106
+ parameters.append(AnuvakNumber)
107
+ if AdhyayaNumber:
108
+ query += " AND AdhyayaNumber = %s"
109
+ parameters.append(AdhyayaNumber)
110
+
111
+ # Execute the SQL query
112
+ results = execute_sql_query(query, parameters)
113
+
114
+ if results:
115
+ return results
116
+ else:
117
+ return None
118
+ except Exception as e:
119
+ logging.error(f"Error in _get_mantra_details_by_scripture: {e}")
120
+
121
+
122
+
123
+ def get_vedamantra_details(mantraid=None, scripture_name=None, KandahNumber=None,MandalaNumber=None, ArchikahNumber=None,
124
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None,
125
+ AnuvakNumber=None, AdhyayaNumber=None):
126
+ '''
127
+ This function is used to get the vedamantra such as vedamantra, padapatha, devata, chandah,
128
+ and rishi, from all Vedas (RigVeda, AtharvaVeda, SamaVeda, KrishnaYajurVeda, and ShuklaYajurVeda).
129
+ The Vedic scriptures has the following structure: \
130
+ RigVeda->Mandala->Shukta->Mantra\
131
+ SamaVeda->Archikah->Shukta->Mantra\
132
+ AtharvaVeda->Kandah->Shukta->Mantra\
133
+ ShuklaYajurVeda->Adhyaya->Mantra\
134
+ KrishnaYajurVeda->Kandah->Prapatak->Anuvak->Mantra\
135
+ Sample Questions:
136
+ 1. Obtain the vedamantra of the mantra whose id is 1.1.1.1?
137
+ 2. Retrieve the devata of the vedamantra from Rigveda, first mandala, first shukta, and first mantra.
138
+ 3. Provide the meaning of the vedamantra from Rigveda, first mandala, first shukta, and first mantra written by Tulsi Ram.
139
+ 4. Explain the adhibautic meaning of the first mantra from RigVeda, first mandala, and first shukta.
140
+ 5. Identify the mantraVishaya of the vedamantra from RigVeda, first mandala, first shukta, and first mantra.
141
+ 6. What is the adibhautic meaning of the mantra 1.1.1.9?
142
+ 7. What is the adhyatmic meaning of the mantra 1.1.1.7?
143
+ 8. What is the adhidyvic meaning of the 6th mantra from RigVeda, first mandala, and first shukta?
144
+ '''
145
+ try:
146
+ query = "" # Initialize query outside of the if-else block
147
+ if mantraid:
148
+ query = f'''SELECT mantra_json FROM veda_content WHERE mantra_number = "{mantraid}"
149
+ '''
150
+ else:
151
+ filter_df = _get_mantra_details_by_scripture(scripture_name=scripture_name, KandahNumber=KandahNumber,MandalaNumber=MandalaNumber, ArchikahNumber=ArchikahNumber,
152
+ ShuktaNumber=ShuktaNumber, PrapatakNumber=PrapatakNumber, MantraNumber=MantraNumber,
153
+ AnuvakNumber=AnuvakNumber, AdhyayaNumber=AdhyayaNumber)
154
+ if filter_df is not None:
155
+ mantra_id = filter_df[0]['mantra_id']
156
+ query = f'''SELECT mantra_json FROM veda_content WHERE mantra_number = "{mantra_id}"
157
+ '''
158
+ return _get_mantra_details(query)
159
+ except Exception as e:
160
+ return json.dumps({"error": str(e)})
161
+
162
+ def get_vedamantra_summary(mantraid=None, scripture_name=None, KandahNumber=None,MandalaNumber=None, ArchikahNumber=None,
163
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None,
164
+ AnuvakNumber=None, AdhyayaNumber=None):
165
+ '''
166
+ Use the function `get_vedamantra_summary` to access the information such as adibhautic meaning of the mantra, anvaya of the mantra, mantraVishaya of the mantra,
167
+ adhibautic (or adhyatmic or adhidyvic) meaning (or bhavarth) of the mantra, purpose of the mantra, usage of the mantra, and tippani of the mantra.
168
+ Sample Query:
169
+ 1. Obtain the anvaya of the mantra whose id (mantraid) is 1.1.1.1?
170
+ 2. Retrieve tha adibhautic meaning of the first mantra from RigVeda, first mandala, and first shukta.
171
+ '''
172
+ try:
173
+ if mantraid:
174
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraid}'"
175
+ else:
176
+ filtered_df = _get_mantra_details_by_scripture(scripture_name=scripture_name, KandahNumber=KandahNumber,MandalaNumber=MandalaNumber, ArchikahNumber=ArchikahNumber,
177
+ ShuktaNumber=ShuktaNumber, PrapatakNumber=PrapatakNumber, MantraNumber=MantraNumber,
178
+ AnuvakNumber=AnuvakNumber, AdhyayaNumber=AdhyayaNumber)
179
+ if filtered_df is not None:
180
+ mantra_id = filtered_df[0]['mantra_id']
181
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantra_id}'"
182
+ else:
183
+ return None
184
+ json_dict = get_details_mantra_json(query)
185
+ mantra_summary = json_dict['mantraSummary']['language']
186
+ summary_dict = {"Roman-IAST summary of vedamantra": json_dict['mantraSummary']['language'][1]}
187
+ for item in mantra_summary:
188
+ if item['languageName'] == 'English':
189
+ mahatma = item['mahatma']['mahatmaName']
190
+ summary_dict[f"English summary of vedamantra by {mahatma}"] = item
191
+ return summary_dict
192
+ except Exception as e:
193
+ return {"error": str(e)}
194
+
195
+
196
+
197
+ def get_pada_meaning(pada):
198
+ '''
199
+ Purpose: For given sanskrit word, you have collection of meanings for available roots and stems of it.\
200
+ You need to process this information as context and provide possible meanings for given word.
201
+ Sample query:
202
+ 1. What is the meaning of the word apratidhṛṣṭa-śavasam?
203
+ '''
204
+ #pada=iast_process(pada)
205
+ try:
206
+ query = f'''
207
+ SELECT * FROM term_details_modified WHERE Pada = "{pada}"
208
+ '''
209
+ # Execute the query to get details from the database
210
+ details = execute_sql_query(query)
211
+ #print(details)
212
+ pada_details = details[0]
213
+ #print(pada_details['Morphology'])
214
+ meanings_list = []
215
+ for morphs in ast.literal_eval(pada_details['Morphology']):
216
+ for field in ['stem', 'root']:
217
+ word = morphs.get(field)
218
+ if word:
219
+ meanings_list.append(get_list_meaning_word(word))
220
+ return meanings_list
221
+ except Exception as e:
222
+ logging.error(f"Error in get_pada_meaning: {e}")
223
+ return {"error": f"Required meaning associated with pada is not available. {e}"}
224
+
225
+
226
+
227
+ def _get_pada_details_by_scripture(pada, scripture_name=None, KandahNumber=None, MandalaNumber=None, ArchikahNumber=None,
228
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None, AnuvakNumber=None,
229
+ AdhyayaNumber=None):
230
+ try:
231
+ # Construct the base SQL query
232
+ query = "SELECT * FROM term_details_modified WHERE Pada = %s"
233
+ parameters = [pada]
234
+
235
+ # Add conditions based on provided parameters
236
+ if scripture_name:
237
+ query += " AND scripture_name = %s"
238
+ parameters.append(scripture_name)
239
+ if KandahNumber:
240
+ query += " AND KandahNumber = %s"
241
+ parameters.append(KandahNumber)
242
+ if MandalaNumber:
243
+ query += " AND MandalaNumber = %s"
244
+ parameters.append(MandalaNumber)
245
+ if ArchikahNumber:
246
+ query += " AND ArchikahNumber = %s"
247
+ parameters.append(ArchikahNumber)
248
+ if ShuktaNumber:
249
+ query += " AND ShuktaNumber = %s"
250
+ parameters.append(ShuktaNumber)
251
+ if PrapatakNumber:
252
+ query += " AND PrapatakNumber = %s"
253
+ parameters.append(PrapatakNumber)
254
+ if MantraNumber:
255
+ query += " AND MantraNumber = %s"
256
+ parameters.append(MantraNumber)
257
+ if AnuvakNumber:
258
+ query += " AND AnuvakNumber = %s"
259
+ parameters.append(AnuvakNumber)
260
+ if AdhyayaNumber:
261
+ query += " AND AdhyayaNumber = %s"
262
+ parameters.append(AdhyayaNumber)
263
+
264
+ # Execute the SQL query
265
+ results = execute_sql_query(query, parameters)
266
+
267
+ if results:
268
+ return results
269
+ else:
270
+ return None
271
+
272
+ except Exception as e:
273
+ logging.error(f"Error in _get_pada_details_by_scripture: {e}")
274
+ return None
275
+
276
+ def _get_vedamantra_meaning(mantraID, MahatmaName=None):
277
+ try:
278
+ query = f"SELECT mantra_json FROM veda_content WHERE mantra_number = '{mantraID}'"
279
+ jsonDict = get_details_mantra_json(query)
280
+ mantraSummary = jsonDict['mantraSummary']['language']
281
+ if MahatmaName is not None:
282
+ filtered_summary = [data_dict for data_dict in mantraSummary if data_dict.get('mahatma', {}).get('mahatmaName') == MahatmaName]
283
+ if filtered_summary:
284
+ mantraSummary = filtered_summary
285
+ best_meaning = None
286
+ best_count = 0
287
+ for data_dict in mantraSummary:
288
+ if data_dict.get('languageName') == "English":
289
+ meanings = data_dict['mahatma']['bhavartha']
290
+ count = sum(bool(meanings.get(cat, None)) for cat in ['adibhautic', 'adidaivic', 'adhyatmic'])
291
+ if count >= best_count:
292
+ best_meaning = {cat: meanings.get(cat, None) for cat in ['adibhautic', 'adidaivic', 'adhyatmic']}
293
+ best_count = count
294
+ return best_meaning if best_meaning else json.dumps({"error": "Required meaning associated with vedamantra is not available."})
295
+ except Exception as e:
296
+ logging.error(f"Error in _get_vedamantra_meaning: {e}")
297
+ return json.dumps({"error": f"An error occurred: {e}"})
298
+
299
+ def _get_pada_morphology(term_details, meanings):
300
+ try:
301
+ morphology_list = ast.literal_eval(term_details['Morphology'])
302
+ term_morph_list = []
303
+ for morphs in morphology_list:
304
+ term_info = {}
305
+ for field in ['stem', 'root']:
306
+ morph_word = morphs.get(field)
307
+ if morph_word:
308
+ meaning = word_sentence_similarity(meanings, morph_word)
309
+ term_info[f'{field}_word'] = morph_word
310
+ term_info[f'{field}_meaning'] = meaning[0][0] if meaning else None
311
+ term_info[f'{field}_score'] = meaning[0][1] if meaning else None
312
+ term_info['grammar'] = morphs['grammar']
313
+ term_morph_list.append(term_info)
314
+ return term_morph_list
315
+ except Exception as e:
316
+ logging.error(f"Error in _get_pada_morphology: {e}")
317
+ return []
318
+
319
+ def get_morphological_info_of_pada(pada, mantraid=None, scripture_name=None, KandahNumber=None, MandalaNumber=None,
320
+ ArchikahNumber=None, ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None,
321
+ AnuvakNumber=None, AdhyayaNumber=None):
322
+ '''
323
+ This help to get segmentation and morphological information about the word.
324
+ '''
325
+ try:
326
+ if pada:
327
+ query = f'''SELECT * FROM term_details_modified WHERE Pada = "{pada}"
328
+ '''
329
+ details = execute_sql_query(query)
330
+ else:
331
+ # Placeholder for _get_pada_details_by_scripture function call
332
+ # Replace with your actual implementation
333
+ details = _get_pada_details_by_scripture(pada, scripture_name=scripture_name, KandahNumber=KandahNumber,
334
+ MandalaNumber=MandalaNumber, ArchikahNumber=ArchikahNumber,
335
+ ShuktaNumber=ShuktaNumber, PrapatakNumber=PrapatakNumber,
336
+ MantraNumber=MantraNumber, AnuvakNumber=AnuvakNumber,
337
+ AdhyayaNumber=AdhyayaNumber)
338
+
339
+ if details:
340
+ if mantraid is not None:
341
+ for record in details:
342
+ if record["mantra_id"] == mantraid:
343
+ segmentation = record["Segmentation"]
344
+ morphological_info = record["Morphology"]
345
+ return {"morphology_info": {"segmentation": segmentation, "morphology": morphological_info}}
346
+ return {"error": f"No details found for mantraid '{mantraid}'"}
347
+ else:
348
+ pada_details = details[0]
349
+ segmentation = pada_details["Segmentation"]
350
+ morphological_info = pada_details["Morphology"]
351
+ return {"morphology_info": {"segmentation": segmentation, "morphology": morphological_info}}
352
+ else:
353
+ return {"error": "No details found for pada."}
354
+
355
+ except Exception as e:
356
+ logging.error(f"Error in get_morphological_info_of_pada: {e}")
357
+ return {"error": f"Failed to get meaning of the word {pada}. {e}"}
358
+
359
+
360
+ def get_adibauatic_adidaivic_adhyatmic_meaning_of_pada(pada, mantraid=None, scripture_name=None,
361
+ KandahNumber=None,MandalaNumber=None, ArchikahNumber=None,
362
+ ShuktaNumber=None, PrapatakNumber=None, MantraNumber=None,
363
+ AnuvakNumber=None, AdhyayaNumber=None,MahatmaName=None):
364
+ '''
365
+ Sample query:
366
+ 1. What is the adibhautic meaning of pada 'agnim' from RigVeda, first mandala, first shukta and first mantra?
367
+ 2. What is the adhyatmic meaning of the pada agnim in the context of the mantra whose id is '1.1.1.1?'
368
+ '''
369
+ try:
370
+ if mantraid:
371
+ query = f'''
372
+ SELECT * FROM term_details_modified WHERE mantra_id = '{mantraid}' AND Pada = "{pada}"
373
+ '''
374
+ # Execute the query to get details from the database
375
+ details = execute_sql_query(query)
376
+ else:
377
+ # Call the function to get details by scripture
378
+ details = _get_pada_details_by_scripture(pada, scripture_name=scripture_name, KandahNumber=KandahNumber,MandalaNumber=MandalaNumber, ArchikahNumber=ArchikahNumber,
379
+ ShuktaNumber=ShuktaNumber, PrapatakNumber=PrapatakNumber, MantraNumber=MantraNumber,
380
+ AnuvakNumber=AnuvakNumber, AdhyayaNumber=AdhyayaNumber)
381
+
382
+ if details:
383
+ pada_details = details[0] # Assuming details is a list of dictionaries, select the first item
384
+ mantraID = pada_details['mantra_id']
385
+ meanings = _get_vedamantra_meaning(mantraID,MahatmaName=MahatmaName)
386
+ if 'error' in meanings:
387
+ return json.dumps(meanings)
388
+ ab_term_morph_list = _get_pada_morphology(pada_details, meanings['adibhautic'])
389
+ ad_term_morph_list = _get_pada_morphology(pada_details, meanings['adidaivic'])
390
+ at_term_morph_list = _get_pada_morphology(pada_details, meanings['adhyatmic'])
391
+ return json.dumps({
392
+ f'adibhautic_info_{pada}': ab_term_morph_list,
393
+ 'vedamantra_adibhautic_meaning': meanings['adibhautic'],
394
+ f'adidavic_info_{pada}': ad_term_morph_list,
395
+ 'vedamantra_adidavic_meaning': meanings['adidaivic'],
396
+ f'adhyatmic_info_{pada}': at_term_morph_list,
397
+ 'vedamantra_adhyatmic_meaning': meanings['adhyatmic']
398
+ })
399
+ else:
400
+ return json.dumps({"error": f"No details found for pada '{pada}'"})
401
+ except Exception as e:
402
+ logging.error(f"Error in get_adibauatic_adidaivic_adhyatmic_meaning_of_pada: {e}")
403
+ return json.dumps({"error": f"Failed to get meaning of the word {pada}. {e}"})
404
+
405
+ # sql agent
406
+ from langchain_community.utilities.sql_database import SQLDatabase
407
+ from database import get_db
408
+ from langchain_community.agent_toolkits import create_sql_agent
409
+ from langchain_openai import ChatOpenAI
410
+ from langchain.pydantic_v1 import BaseModel, Field
411
+ from langchain.tools import StructuredTool
412
+ from typing import Optional
413
+ import json
414
+
415
+
416
+ class NLSQLResponse(BaseModel):
417
+ user_query:str = Field(description="user query")
418
+
419
+ db = get_db()
420
+ agent_executor = create_sql_agent(llm_AI4, db=db, agent_type="openai-tools", verbose=False)
421
+
422
+ def get_response(user_query):
423
+ response = agent_executor.invoke(user_query)
424
+ return response
425
+
426
+ sql_tool = StructuredTool.from_function(
427
+ func = get_response,
428
+ name = "nl_sql_query",
429
+ description="""To obtains a information using natural language query to sql query and then exceting sql query to get natural response.
430
+ Sample Query:
431
+ 1. How many mantras are there in RigVeda?
432
+ 2. What is the segmentation of the word 'prathasva' from KrishnaYajurVeda?""",
433
+ args_schema=NLSQLResponse,
434
+ return_direct=True,
435
+ )
436
+
437
+ pada_morphological_tool = StructuredTool.from_function(
438
+ func=get_morphological_info_of_pada,
439
+ name="pada_morphology",
440
+ description="""Purpose: To obtain morphological information such as segmentation, morphology, and grammar of a word.\
441
+ Sample query:
442
+ 1. What is the segmentation and morphology of the word 'apratidhṛṣṭa-śavasam' from RigVeda?
443
+ 2. What is the grammar of the word 'prathasva' from KrishnaYajurVeda?
444
+ """,
445
+ args_schema=PadaAAAInput,
446
+ return_direct=False
447
+ )
448
+
449
+ pada_meaning_tool = StructuredTool.from_function(
450
+ func=get_pada_meaning,
451
+ name="pada_meaning",
452
+ description="""Purpose: For given sanskrit word, you have collection of meanings for available roots and stems of it.\
453
+ You need to process this information as context and provide possible meanings for given word.
454
+ Sample query:
455
+ 1. What is the meaning of the word apratidhṛṣṭa-śavasam?
456
+ """,
457
+ args_schema=PadaMeaningInput,
458
+ return_direct=False
459
+ )
460
+
461
+ pada_word_sense_tool = StructuredTool.from_function(
462
+ func=get_adibauatic_adidaivic_adhyatmic_meaning_of_pada,
463
+ name="pada_AAA_meaning",
464
+ description="""To obtain a complete or meaningful adibauatic/adhidaivic/adhyatmic meaning of a word or pada based on context information.\n
465
+ Sample query:
466
+ 1. What is the adibhautic meaning of pada 'agnim' from RigVeda, first mandala, first shukta and first mantra?
467
+ 2. What is the adhyatmic meaning of the pada agnim in the context of the mantra whose id is '1.1.1.1'?
468
+ """,
469
+ args_schema=PadaAAAInput,
470
+ return_direct=False
471
+ )
472
+
473
+ vedamantra_tool = StructuredTool.from_function(
474
+ func=get_vedamantra_details,
475
+ name="vedamantra_details",
476
+ description='''This function is used to get the vedamantra such as vedamantra, padapatha, devata, chandah,
477
+ and rishi, from all Vedas (RigVeda, AtharvaVeda, SamaVeda, KrishnaYajurVeda, and ShuklaYajurVeda).
478
+ Sample Questions:
479
+ 1. Obtain the vedamantra of the mantra whose id is 1.1.1.1?
480
+ 2. Retrieve the devata of the vedamantra from Rigveda, first mandala, first shukta, and first mantra.
481
+ 3. Provide the meaning of the vedamantra from Rigveda, first mandala, first shukta, and first mantra written by Tulsi Ram.
482
+ 4. Explain the adhibautic meaning of the first mantra from RigVeda, first mandala, and first shukta.
483
+ ''',
484
+ args_schema=MantraInput,
485
+ return_direct=False
486
+ )
487
+ vedamantra_summary_tool =StructuredTool.from_function(
488
+ func=get_vedamantra_summary,
489
+ name="vedamantra_summary",
490
+ description="""Use the function `get_vedamantra_summary` to access the information such as adibhautic meaning of the mantra, anvaya of the mantra, mantraVishaya of the mantra,
491
+ adhibautic (or adhyatmic or adhidyvic) meaning (or bhavarth) of the mantra, purpose of the mantra, usage of the mantra, and tippani of the mantra.
492
+ Sample Query:
493
+ 1. Obtain the anvaya of the mantra whose id (mantraid) is 1.1.1.1?
494
+ 2. Retrieve tha adibhautic meaning of the first mantra from RigVeda, first mandala, and first shukta.
495
+ 3. Provide the adhyatmic meaning of the mantra 1.1.1.9?
496
+ 4. What is the tippani of the mantra 1.1.1.7?
497
+ 5. What is the adhyatmic meaning of the mantra 1.1.1.7?
498
+ 6. What is the mantravishaya of the 6th mantra from RigVeda, first mandala, and first shukta?""",
499
+ args_schema=MantraInput,
500
+ return_direct=False
501
+ )
502
+
503
+
504
+ tools_list = [pada_morphological_tool, sql_tool, pada_meaning_tool, pada_word_sense_tool, vedamantra_tool, vedamantra_summary_tool]
505
+ #vector_tool,
506
+
507
+ # Convert tools to OpenAI functions
508
+ tools_all = [convert_to_openai_function(tool) for tool in tools_list]
509
+ # Set up the tools to execute them from the graph
510
+ from langgraph.prebuilt import ToolExecutor
511
+
512
+ tool_executor = ToolExecutor(tools_list)
513
+ #tools_response = tools_all.append(convert_to_openai_function(Response))
514
+ llm_with_tools = llm_AI4.bind_tools(tools_all)
515
+ #tool_map = {tool.name: tool for tool in tools_list}
516
+
517
+
518
+ def call_tools(msg: AIMessage) -> Runnable:
519
+ """Simple sequential tool calling helper."""
520
+ tool_map = {tool.name: tool for tool in tools_list}
521
+ tool_calls = msg.tool_calls.copy()
522
+ for tool_call in tool_calls:
523
+ tool_call["output"] = tool_map[tool_call["name"]].invoke(tool_call["args"])
524
+ return tool_calls
525
+
526
+ #print("Invoking the chain")
527
+ tool_chain = llm_with_tools | call_tools
528
+
ramana_docs_ids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b132a67107aed2df316c947a72bda5bcad9eae4917d0a062e82e00733eba31ed
3
+ size 13480212
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.12
2
+ langchain-core>=0.1.31
3
+ langsmith==0.1.29
4
+ langgraph==0.0.28
5
+ streamlit==1.31.1
6
+ langchain_openai==0.0.8
7
+ openai==1.14.1
8
+ chardet==5.2.0
9
+ aksharamukha==2.1.2
10
+ sentence_transformers==2.4.0
11
+ langchain-nomic==0.0.2
12
+ beautifulsoup4==4.12.3
13
+ pymysql==1.1.0
14
+ mysql-connector-python==8.3.0
15
+ pinecone-text==0.9.0
16
+ rank-bm25==0.2.2
17
+ momento==1.20.1
18
+ streamlit_feedback==0.1.3
19
+ pinecone-client==3.2.2
20
+ nomic==3.0.15
21
+ langchain-pinecone==0.1.0
22
+ tabulate==0.9.0
23
+ langchainhub==0.1.15
24
+ psycopg2==2.9.9
utils.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import requests
4
+ import json
5
+ import pickle
6
+ import numpy as np
7
+ import pandas as pd
8
+ from typing import List
9
+ from typing import Optional
10
+ from typing import Union
11
+ import streamlit as st
12
+ from database import execute_sql_query
13
+ from bs4 import BeautifulSoup
14
+ from aksharamukha import transliterate
15
+ from sentence_transformers import util
16
+ from langchain_nomic.embeddings import NomicEmbeddings
17
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
18
+
19
+
20
+
21
+ def load_pickle(path):
22
+ with open(path,'rb') as f:
23
+ docs = pickle.load(f)
24
+ return docs
25
+
26
+
27
+
28
+ def initialize_embedding_model(model_name, device="cpu", normalize_embeddings=True):
29
+ model_kwargs = {"device": device}
30
+ encode_kwargs = {"normalize_embeddings": normalize_embeddings}
31
+ return HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)
32
+
33
+
34
+
35
+ # embedding model for quick calculations
36
+ os.environ['NOMIC_API_KEY'] = os.getenv('NOMIC_API_KEY')
37
+ #nomic embed model used for similarity scores
38
+ nomic_embed_model = NomicEmbeddings(
39
+ dimensionality=128,
40
+ model="nomic-embed-text-v1.5",
41
+ )
42
+
43
+
44
+
45
+ def get_list_meaning_word(word):
46
+ pada_meanings = {'pada': word,
47
+ 'Monier-Williams Sanskrit-English Dictionary (1899)': [],
48
+ 'Shabda-Sagara (1900)': [],
49
+ 'Apte-Practical Sanskrit-English Dictionary (1890)': [],
50
+ }
51
+ url = f"https://ambuda.org/tools/dictionaries/mw,shabdasagara,apte/{word}"
52
+
53
+ try:
54
+ # Fetch HTML content
55
+ response = requests.get(url)
56
+ response.raise_for_status()
57
+
58
+ # Parse HTML with BeautifulSoup
59
+ soup = BeautifulSoup(response.text, 'html.parser')
60
+
61
+ # Extracting text from different tags
62
+ divs = soup.find_all('div', class_='my-4', attrs={'x-show': 'show'})
63
+
64
+ try:
65
+ # Find all list items <li> within the specified <ul> tag
66
+ div_items_0 = divs[0].find('ul').find_all('li', class_='dict-entry mw-entry')
67
+ # Print the text content of each list item
68
+ dive_text_0 = [li_tag.get_text(strip=True) for li_tag in div_items_0]
69
+ text_0_trans = [transliterate.process(src='Devanagari', tgt='IAST', txt=text) for text in dive_text_0]
70
+ pada_meanings['Monier-Williams Sanskrit-English Dictionary (1899)'] = text_0_trans
71
+ except :
72
+ print("Error: Unable to find Monier-Williams Sanskrit-English Dictionary (1899) data.")
73
+
74
+ try:
75
+ div_items_1 = divs[1].find_all('div')
76
+ dive_text_1 = [item.get_text(strip=True) for item in div_items_1]
77
+ text_1_trans = [transliterate.process(src='Devanagari', tgt='IAST', txt=text) for text in dive_text_1]
78
+ pada_meanings['Shabda-Sagara (1900)'] = text_1_trans
79
+ except :
80
+ print("Error: Unable to find Shabda-Sagara (1900) data.")
81
+
82
+ try:
83
+ apte_meanings = []
84
+ for tag in divs[2].find_all('b'):
85
+ if tag.text.strip() != '—':
86
+ text1 = tag.text.strip() # English text within <b> tag
87
+ sibling = tag.find_next_sibling() # Text following <b> tag
88
+ text2 = tag.next_sibling.strip() + ' ' # English text following <b> tag
89
+ while sibling.name != 'div':
90
+ if sibling.name is None: # Handling non-tag text
91
+ text2 += " "
92
+ elif sibling.name == 'span': # Handling <b> tag
93
+ IAST_text = transliterate.process(src='Devanagari', tgt='IAST', txt=sibling.text.strip())
94
+ text2 += IAST_text + ' ' + sibling.next_sibling.strip()
95
+ else:
96
+ text2 += sibling.text.strip() + ' ' + sibling.next_sibling.strip()
97
+ sibling = sibling.find_next_sibling()
98
+ apte_meanings.append(text2)
99
+ pada_meanings['Apte-Practical Sanskrit-English Dictionary (1890)'] = apte_meanings[:-1]
100
+ except:
101
+ print("Error: Unable to find Apte-Practical Sanskrit-English Dictionary (1890) data.")
102
+
103
+ except requests.exceptions.RequestException as e:
104
+ print(f"Error: Failed to fetch data from {url}. {e}")
105
+
106
+ return pada_meanings
107
+
108
+ #get similarity scores
109
+ def word_sentence_similarity(meanings, root_stem_word):
110
+ # Check if the word embeddings are not empty
111
+ if not meanings or not root_stem_word:
112
+ return None
113
+
114
+ meaning_embedding = np.array(nomic_embed_model.embed_query(meanings))
115
+ all_meanings = []
116
+ word_score_pair = []
117
+ all_meanings.extend(get_list_meaning_word(root_stem_word)['Monier-Williams Sanskrit-English Dictionary (1899)'])
118
+ all_meanings.extend(get_list_meaning_word(root_stem_word)['Shabda-Sagara (1900)'])
119
+ for word_meaning in all_meanings:
120
+ root_stem_word_meaning_embedding = np.array(nomic_embed_model.embed_query(word_meaning))
121
+ # Calculate cosine similarity
122
+ similarity_score = util.pytorch_cos_sim(meaning_embedding, root_stem_word_meaning_embedding).item()
123
+ word_score_pair.append((word_meaning,similarity_score))
124
+ # Sort the list in descending order based on similarity scores
125
+ sorted_word_score_pairs = sorted(word_score_pair, key=lambda x: x[1], reverse=True)
126
+ return sorted_word_score_pairs
127
+
128
+ #extract the adhibautic meaning of the mantra from the vedamantra
129
+ def extract_meaning_by_language(data_list, target_language='English'):
130
+ for data_dict in data_list:
131
+ if data_dict.get('languageName') == target_language:
132
+ return data_dict.get('mahatma', {})
133
+ return None
134
+
135
+ #mantra_json_details
136
+ def get_details_mantra_json(query):
137
+ description, data = execute_sql_query(query)
138
+ df = pd.DataFrame(data)
139
+ df.columns = [x[0] for x in description]
140
+ mantra_json = df['mantra_json'].values[0]
141
+ cleaned_data = re.sub('<[^<]+?>', '', mantra_json)
142
+ return json.loads(cleaned_data)
143
+
144
+ def iast_process(input_text):
145
+ output_text = re.sub('[\u0951-\u0954,\u200d,\u0331]', '', input_text)
146
+ return output_text
147
+
vector_tool.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import pickle
4
+ import streamlit as st
5
+ from dotenv import load_dotenv
6
+ from pinecone import Pinecone, ServerlessSpec
7
+ from utils import load_pickle, initialize_embedding_model
8
+ from langchain_community.retrievers import BM25Retriever
9
+ from langchain_pinecone import PineconeVectorStore
10
+ from langchain.retrievers import EnsembleRetriever
11
+ from langchain.tools.retriever import create_retriever_tool
12
+
13
+
14
+
15
+ # Load .env file
16
+ load_dotenv()
17
+
18
+ # Constants
19
+ INDEX_NAME = "veda-index-v2"
20
+ MODEL_NAME = "BAAI/bge-large-en-v1.5"
21
+
22
+
23
+
24
+ # Initialize Pinecone client
25
+ os.environ['PINECONE_API_KEY'] = os.getenv("PINECONE_API_KEY")
26
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
27
+ pc = Pinecone(api_key=PINECONE_API_KEY)
28
+
29
+ #@st.cache_resource
30
+ def create_or_load_index():
31
+ # Check if index already exists
32
+ if INDEX_NAME not in pc.list_indexes().names():
33
+ # Create index if it does not exist
34
+ pc.create_index(
35
+ INDEX_NAME,
36
+ dimension=1024,
37
+ metric='dotproduct',
38
+ spec=ServerlessSpec(
39
+ cloud="aws",
40
+ region="us-east-1"
41
+ )
42
+ )
43
+ # Wait for index to be initialized
44
+ while not pc.describe_index(INDEX_NAME).status['ready']:
45
+ time.sleep(1)
46
+ # Connect to index
47
+ return pc.Index(INDEX_NAME)
48
+
49
+ # Load documents
50
+ docs = load_pickle("ramana_docs_ids.pkl")
51
+ # Initialize embedding model
52
+ embedding = initialize_embedding_model(MODEL_NAME)
53
+ # Create or load index
54
+ index = create_or_load_index()
55
+
56
+ # Initialize BM25 retriever
57
+ bm25_retriever = BM25Retriever.from_texts(
58
+ [text['document'].page_content for text in docs],
59
+ metadatas=[text['document'].metadata for text in docs]
60
+ )
61
+ bm25_retriever.k = 2
62
+
63
+ # Switch back to normal index for LangChain
64
+ vector_store = PineconeVectorStore(index, embedding)
65
+ retriever = vector_store.as_retriever(search_type="mmr")
66
+
67
+ # Initialize the ensemble retriever
68
+ ensemble_retriever = EnsembleRetriever(
69
+ retrievers=[bm25_retriever, retriever], weights=[0.2, 0.8]
70
+ )
71
+
72
+
73
+ vector_tools = create_retriever_tool(
74
+ retriever = ensemble_retriever,
75
+ name = "vector_retrieve",
76
+ description="Search and return documents related user query from the vector index.",
77
+ )
78
+
79
+ from langchain import hub
80
+
81
+ prompt = hub.pull("hwchase17/openai-tools-agent")
82
+ prompt.messages
83
+
84
+ from langchain.agents import AgentExecutor, create_openai_tools_agent
85
+ from langchain_openai import ChatOpenAI
86
+ import streamlit as st
87
+
88
+ os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
89
+ #load llm model
90
+ llm_AI4 = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
91
+
92
+ agent = create_openai_tools_agent(llm_AI4, [vector_tools], prompt)
93
+ agent_executor = AgentExecutor(agent=agent, tools=[vector_tools])