euracle commited on
Commit
c8d3120
Β·
verified Β·
1 Parent(s): 2e052d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +145 -143
app.py CHANGED
@@ -1,144 +1,146 @@
1
- import streamlit as st
2
- import os
3
- from dotenv import load_dotenv
4
- import time
5
- from langchain.vectorstores import Chroma
6
- from langchain.embeddings import HuggingFaceEmbeddings
7
- from langchain_core.prompts import ChatPromptTemplate
8
- from langchain_groq import ChatGroq
9
- from langchain.chains import RetrievalQA
10
- from langchain.document_loaders import PyPDFLoader
11
- from langchain.text_splitter import RecursiveCharacterTextSplitter
12
-
13
- start_time = time.time()
14
- # Set page title and icon
15
- st.set_page_config(page_title="Dr. Radha: The Agro-Homeopath", page_icon="πŸš€", layout="wide")
16
-
17
- # Center the title
18
- st.markdown("""
19
- <style>
20
- #the-title {
21
- text-align: center;
22
- }
23
- </style>
24
- """, unsafe_allow_html=True)
25
-
26
- # Display the title
27
- st.title("πŸ“š Ask Dr. Radha - World's First AI based Agro-Homeopathy Doctor")
28
-
29
- # Load images
30
- human_image = "human.png"
31
- robot_image = "bot.png"
32
-
33
- # Load environment variables
34
- load_dotenv()
35
- end_time = time.time()
36
- print(f"Loading environment variables took {end_time - start_time:.4f} seconds")
37
-
38
- start_time = time.time()
39
- # Set up Groq API
40
- llm = ChatGroq(api_key=os.getenv("GROQ_API_KEY"), max_tokens=None, timeout=None, max_retries=2, temperature=0.5, model="llama-3.1-70b-versatile")
41
-
42
- # Set up embeddings
43
- embeddings = HuggingFaceEmbeddings()
44
- end_time = time.time()
45
- print(f"Setting up Groq LLM & Embeddings took {end_time - start_time:.4f} seconds")
46
-
47
- # Initialize session state
48
- if "documents" not in st.session_state:
49
- st.session_state["documents"] = None
50
- if "vector_db" not in st.session_state:
51
- st.session_state["vector_db"] = None
52
- if "query" not in st.session_state:
53
- st.session_state["query"] = ""
54
-
55
- def load_data():
56
- pdf_folder = "docs"
57
- loaders = [PyPDFLoader(os.path.join(pdf_folder, fn)) for fn in os.listdir(pdf_folder)]
58
- documents = []
59
- for loader in loaders:
60
- documents.extend(loader.load())
61
-
62
- #text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=30)
63
- text_splitter = RecursiveCharacterTextSplitter(
64
- chunk_size=1000,
65
- chunk_overlap=200,
66
- length_function=len,
67
- separators=["\n\n", "\n", " ", ""]
68
- )
69
- texts = text_splitter.split_documents(documents)
70
- # Set up vector database
71
- persist_directory = "db"
72
- vector_db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
73
-
74
- return documents, vector_db
75
-
76
- # Load and process PDFs
77
- start_time = time.time()
78
- # Load data if not already loaded
79
- if st.session_state["documents"] is None or st.session_state["vector_db"] is None:
80
- with st.spinner("Loading data..."):
81
- documents, vector_db = load_data()
82
- st.session_state["documents"] = documents
83
- st.session_state["vector_db"] = vector_db
84
- else:
85
- documents = st.session_state["documents"]
86
- vector_db = st.session_state["vector_db"]
87
-
88
- end_time = time.time()
89
- print(f"Loading and processing PDFs & vector database took {end_time - start_time:.4f} seconds")
90
-
91
- # Set up retrieval chain
92
- start_time = time.time()
93
- retriever = vector_db.as_retriever()
94
- qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
95
-
96
- # Chat interface
97
- chat_container = st.container()
98
-
99
- # Create a form for the query input and submit button
100
- with st.form(key='query_form'):
101
- query = st.text_input("Ask your question:", value="")#st.session_state["query"])
102
- submit_button = st.form_submit_button(label='Submit')
103
-
104
- end_time = time.time()
105
- print(f"Setting up retrieval chain took {end_time - start_time:.4f} seconds")
106
- start_time = time.time()
107
-
108
- if submit_button and query:
109
- with st.spinner("Generating response..."):
110
- result = qa({"query": query})
111
- if result['result'].strip() == "":
112
- response = "I apologize, but I don't have enough information in the provided PDFs to answer your question."
113
- else:
114
- response = result['result']
115
-
116
- # Display human image and question
117
- col1, col2 = st.columns([1, 10])
118
- with col1:
119
- st.image(human_image, width=80)
120
- with col2:
121
- st.markdown(f"{query}")
122
- # Display robot image and answer
123
- col1, col2 = st.columns([1, 10])
124
- with col1:
125
- st.image(robot_image, width=80)
126
- with col2:
127
- st.markdown(f"{response}")
128
-
129
- st.markdown("---")
130
-
131
- # Clear the query input
132
- st.session_state["query"] = ""
133
- #st.rerun()
134
-
135
- end_time = time.time()
136
- print(f"Actual query took {end_time - start_time:.4f} seconds")
137
-
138
- # Reload data button
139
- # if st.button("Reload Data"):
140
- # with st.spinner("Reloading data..."):
141
- # documents, vector_db = load_data()
142
- # st.session_state["documents"] = documents
143
- # st.session_state["vector_db"] = vector_db
 
 
144
  # st.success("Data reloaded successfully!")
 
1
+ import streamlit as st
2
+ import os
3
+ from dotenv import load_dotenv
4
+ import time
5
+ from langchain.vectorstores import Chroma
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain_groq import ChatGroq
9
+ from langchain.chains import RetrievalQA
10
+ from langchain.document_loaders import PyPDFLoader
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+
13
+ # Access the secret
14
+ api_key1 = os.getenv("api_key")
15
+ start_time = time.time()
16
+ # Set page title and icon
17
+ st.set_page_config(page_title="Dr. Radha: The Agro-Homeopath", page_icon="πŸš€", layout="wide")
18
+
19
+ # Center the title
20
+ st.markdown("""
21
+ <style>
22
+ #the-title {
23
+ text-align: center;
24
+ }
25
+ </style>
26
+ """, unsafe_allow_html=True)
27
+
28
+ # Display the title
29
+ st.title("πŸ“š Ask Dr. Radha - World's First AI based Agro-Homeopathy Doctor")
30
+
31
+ # Load images
32
+ human_image = "human.png"
33
+ robot_image = "bot.png"
34
+
35
+ # Load environment variables
36
+ load_dotenv()
37
+ end_time = time.time()
38
+ print(f"Loading environment variables took {end_time - start_time:.4f} seconds")
39
+
40
+ start_time = time.time()
41
+ # Set up Groq API
42
+ llm = ChatGroq(api_key=api_key1, max_tokens=None, timeout=None, max_retries=2, temperature=0.5, model="llama-3.1-70b-versatile")
43
+
44
+ # Set up embeddings
45
+ embeddings = HuggingFaceEmbeddings()
46
+ end_time = time.time()
47
+ print(f"Setting up Groq LLM & Embeddings took {end_time - start_time:.4f} seconds")
48
+
49
+ # Initialize session state
50
+ if "documents" not in st.session_state:
51
+ st.session_state["documents"] = None
52
+ if "vector_db" not in st.session_state:
53
+ st.session_state["vector_db"] = None
54
+ if "query" not in st.session_state:
55
+ st.session_state["query"] = ""
56
+
57
+ def load_data():
58
+ pdf_folder = "docs"
59
+ loaders = [PyPDFLoader(os.path.join(pdf_folder, fn)) for fn in os.listdir(pdf_folder)]
60
+ documents = []
61
+ for loader in loaders:
62
+ documents.extend(loader.load())
63
+
64
+ #text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=30)
65
+ text_splitter = RecursiveCharacterTextSplitter(
66
+ chunk_size=1000,
67
+ chunk_overlap=200,
68
+ length_function=len,
69
+ separators=["\n\n", "\n", " ", ""]
70
+ )
71
+ texts = text_splitter.split_documents(documents)
72
+ # Set up vector database
73
+ persist_directory = "db"
74
+ vector_db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
75
+
76
+ return documents, vector_db
77
+
78
+ # Load and process PDFs
79
+ start_time = time.time()
80
+ # Load data if not already loaded
81
+ if st.session_state["documents"] is None or st.session_state["vector_db"] is None:
82
+ with st.spinner("Loading data..."):
83
+ documents, vector_db = load_data()
84
+ st.session_state["documents"] = documents
85
+ st.session_state["vector_db"] = vector_db
86
+ else:
87
+ documents = st.session_state["documents"]
88
+ vector_db = st.session_state["vector_db"]
89
+
90
+ end_time = time.time()
91
+ print(f"Loading and processing PDFs & vector database took {end_time - start_time:.4f} seconds")
92
+
93
+ # Set up retrieval chain
94
+ start_time = time.time()
95
+ retriever = vector_db.as_retriever()
96
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
97
+
98
+ # Chat interface
99
+ chat_container = st.container()
100
+
101
+ # Create a form for the query input and submit button
102
+ with st.form(key='query_form'):
103
+ query = st.text_input("Ask your question:", value="")#st.session_state["query"])
104
+ submit_button = st.form_submit_button(label='Submit')
105
+
106
+ end_time = time.time()
107
+ print(f"Setting up retrieval chain took {end_time - start_time:.4f} seconds")
108
+ start_time = time.time()
109
+
110
+ if submit_button and query:
111
+ with st.spinner("Generating response..."):
112
+ result = qa({"query": query})
113
+ if result['result'].strip() == "":
114
+ response = "I apologize, but I don't have enough information in the provided PDFs to answer your question."
115
+ else:
116
+ response = result['result']
117
+
118
+ # Display human image and question
119
+ col1, col2 = st.columns([1, 10])
120
+ with col1:
121
+ st.image(human_image, width=80)
122
+ with col2:
123
+ st.markdown(f"{query}")
124
+ # Display robot image and answer
125
+ col1, col2 = st.columns([1, 10])
126
+ with col1:
127
+ st.image(robot_image, width=80)
128
+ with col2:
129
+ st.markdown(f"{response}")
130
+
131
+ st.markdown("---")
132
+
133
+ # Clear the query input
134
+ st.session_state["query"] = ""
135
+ #st.rerun()
136
+
137
+ end_time = time.time()
138
+ print(f"Actual query took {end_time - start_time:.4f} seconds")
139
+
140
+ # Reload data button
141
+ # if st.button("Reload Data"):
142
+ # with st.spinner("Reloading data..."):
143
+ # documents, vector_db = load_data()
144
+ # st.session_state["documents"] = documents
145
+ # st.session_state["vector_db"] = vector_db
146
  # st.success("Data reloaded successfully!")