Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
import requests
|
4 |
-
import
|
5 |
-
import chromadb
|
6 |
from langchain.document_loaders import PDFPlumberLoader
|
7 |
from langchain_huggingface import HuggingFaceEmbeddings
|
8 |
from langchain_experimental.text_splitter import SemanticChunker
|
@@ -14,13 +13,16 @@ from prompts import rag_prompt, relevancy_prompt, relevant_context_picker_prompt
|
|
14 |
|
15 |
# ----------------- Streamlit UI Setup -----------------
|
16 |
st.set_page_config(page_title="Blah", layout="wide")
|
17 |
-
st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=150)
|
18 |
st.title("Blah-1")
|
19 |
|
20 |
-
|
21 |
# ----------------- API Keys -----------------
|
22 |
os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
|
23 |
|
|
|
|
|
|
|
|
|
24 |
# ----------------- Clear ChromaDB Cache -----------------
|
25 |
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
26 |
|
@@ -36,10 +38,14 @@ if "processed_chunks" not in st.session_state:
|
|
36 |
if "vector_store" not in st.session_state:
|
37 |
st.session_state.vector_store = None
|
38 |
|
39 |
-
# ----------------- Load Models
|
40 |
llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
|
41 |
rag_llm = ChatGroq(model="mixtral-8x7b-32768")
|
42 |
|
|
|
|
|
|
|
|
|
43 |
# ----------------- PDF Selection (Upload or URL) -----------------
|
44 |
st.sidebar.subheader("π PDF Selection")
|
45 |
pdf_source = st.radio("Choose a PDF source:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
|
@@ -79,7 +85,7 @@ if not st.session_state.pdf_loaded and "pdf_path" in st.session_state:
|
|
79 |
loader = PDFPlumberLoader(st.session_state.pdf_path)
|
80 |
docs = loader.load()
|
81 |
|
82 |
-
# Embedding Model
|
83 |
model_name = "nomic-ai/modernbert-embed-base"
|
84 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"})
|
85 |
|
@@ -137,15 +143,15 @@ if query:
|
|
137 |
final_output = context_management_chain.invoke({"context": context, "retriever_query": query, "query": query})
|
138 |
st.success("β
Full pipeline executed successfully!")
|
139 |
|
140 |
-
# ----------------- Display All Outputs -----------------
|
141 |
-
st.
|
142 |
st.json(final_output["relevancy_response"])
|
143 |
|
144 |
-
st.
|
145 |
st.json(final_output["context_number"])
|
146 |
|
147 |
-
st.
|
148 |
st.json(final_output["relevant_contexts"])
|
149 |
|
150 |
-
st.
|
151 |
st.write(final_output["final_response"])
|
|
|
1 |
import streamlit as st
|
2 |
import os
|
3 |
import requests
|
4 |
+
import chromadb
|
|
|
5 |
from langchain.document_loaders import PDFPlumberLoader
|
6 |
from langchain_huggingface import HuggingFaceEmbeddings
|
7 |
from langchain_experimental.text_splitter import SemanticChunker
|
|
|
13 |
|
14 |
# ----------------- Streamlit UI Setup -----------------
|
15 |
st.set_page_config(page_title="Blah", layout="wide")
|
16 |
+
st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=150)
|
17 |
st.title("Blah-1")
|
18 |
|
|
|
19 |
# ----------------- API Keys -----------------
|
20 |
os.environ["GROQ_API_KEY"] = st.secrets.get("GROQ_API_KEY", "")
|
21 |
|
22 |
+
# ----------------- Ensure Vector Store Directory Exists -----------------
|
23 |
+
if not os.path.exists("./chroma_langchain_db"):
|
24 |
+
os.makedirs("./chroma_langchain_db")
|
25 |
+
|
26 |
# ----------------- Clear ChromaDB Cache -----------------
|
27 |
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
28 |
|
|
|
38 |
if "vector_store" not in st.session_state:
|
39 |
st.session_state.vector_store = None
|
40 |
|
41 |
+
# ----------------- Load Models -------------------
|
42 |
llm_judge = ChatGroq(model="deepseek-r1-distill-llama-70b")
|
43 |
rag_llm = ChatGroq(model="mixtral-8x7b-32768")
|
44 |
|
45 |
+
# Enable verbose logging for debugging
|
46 |
+
llm_judge.verbose = True
|
47 |
+
rag_llm.verbose = True
|
48 |
+
|
49 |
# ----------------- PDF Selection (Upload or URL) -----------------
|
50 |
st.sidebar.subheader("π PDF Selection")
|
51 |
pdf_source = st.radio("Choose a PDF source:", ["Upload a PDF file", "Enter a PDF URL"], index=0, horizontal=True)
|
|
|
85 |
loader = PDFPlumberLoader(st.session_state.pdf_path)
|
86 |
docs = loader.load()
|
87 |
|
88 |
+
# Embedding Model (HF on CPU)
|
89 |
model_name = "nomic-ai/modernbert-embed-base"
|
90 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={"device": "cpu"})
|
91 |
|
|
|
143 |
final_output = context_management_chain.invoke({"context": context, "retriever_query": query, "query": query})
|
144 |
st.success("β
Full pipeline executed successfully!")
|
145 |
|
146 |
+
# ----------------- Display All Outputs (Formatted) -----------------
|
147 |
+
st.markdown("### π₯ Context Relevancy Evaluation")
|
148 |
st.json(final_output["relevancy_response"])
|
149 |
|
150 |
+
st.markdown("### π¦ Picked Relevant Contexts")
|
151 |
st.json(final_output["context_number"])
|
152 |
|
153 |
+
st.markdown("### π₯ Extracted Relevant Contexts")
|
154 |
st.json(final_output["relevant_contexts"])
|
155 |
|
156 |
+
st.markdown("## π₯ RAG Final Response")
|
157 |
st.write(final_output["final_response"])
|