Samuel-Datubo-Jaja
modified app.py and sagemaker_deploy.py
018d0ff
# import streamlit as st
# from langchain_openai import OpenAIEmbeddings # Changed to OpenAI embeddings
# from langchain_chroma import Chroma
# from langchain_groq import ChatGroq
# import os
# from dotenv import load_dotenv
# # Page config
# st.set_page_config(
# page_title="UK Construction Regulations Assistant",
# page_icon="πŸ—οΈ",
# layout="wide"
# )
# # Load environment variables
# load_dotenv()
# # Initialize RAG components
# @st.cache_resource
# def init_rag():
# """Initialize RAG components with caching"""
# try:
# # Check if main_chroma_data exists
# if not os.path.exists("./main_chroma_data"):
# st.error("Error: main_chroma_data directory not found. Please check the directory path.")
# return None, None
# # Initialize embeddings
# try:
# embeddings = OpenAIEmbeddings(
# api_key=os.getenv("OPENAI_API_KEY")
# )
# except Exception as e:
# st.error(f"Error initializing embeddings: {str(e)}")
# return None, None
# # Initialize vector store
# try:
# vectorstore = Chroma(
# collection_name="main_construction_rag",
# embedding_function=embeddings,
# persist_directory="./main_chroma_data"
# )
# except Exception as e:
# st.error(f"Error initializing vector store: {str(e)}")
# return None, None
# # Check if GROQ API key is set
# groq_api_key = os.getenv("GROQ_API_KEY")
# if not groq_api_key:
# st.error("Error: GROQ_API_KEY not found in environment variables")
# return None, None
# # Initialize LLM
# try:
# llm = ChatGroq(
# api_key=groq_api_key,
# model_name="llama-3.3-70b-versatile",
# temperature=0
# )
# except Exception as e:
# st.error(f"Error initializing LLM: {str(e)}")
# return None, None
# return vectorstore, llm
# except Exception as e:
# st.error(f"Error initializing RAG system: {str(e)}")
# return None, None
# # Initialize
# vectorstore, llm = init_rag()
# # Sidebar for feedback
# with st.sidebar:
# st.title("πŸ“ Feedback")
# feedback = st.text_area("Share your feedback on the answers:", height=100)
# if st.button("Submit Feedback"):
# st.success("Thank you for your feedback!")
# # Main interface
# st.title("πŸ—οΈ UK Construction Regulations Assistant")
# st.markdown("""
# This AI assistant helps answer questions about UK construction regulations using:
# - Official Building Regulations documents
# - Expert YouTube content from LABC, RICS, and other authorities
# - Technical documentation and guidance
# """)
# # User input
# question = st.text_input("Enter your question about UK construction regulations:")
# if st.button("Get Answer"):
# if not question:
# st.warning("Please enter a question.")
# elif vectorstore is None or llm is None:
# st.error("RAG system not properly initialized. Please check the errors above.")
# else:
# with st.spinner("Searching regulations and generating answer..."):
# try:
# # Get relevant documents
# docs = vectorstore.similarity_search(question, k=4)
# contexts = [doc.page_content for doc in docs]
# # Generate answer
# context_text = "\n\n".join(contexts)
# prompt = f"""Based on the following context from UK Building Regulations, provide a clear and detailed answer to the question.
# Include specific references to regulations where available.
# Question: {question}
# Context: {context_text}
# Answer:"""
# response = llm.invoke(prompt)
# # Display answer
# st.markdown("### Answer")
# st.write(response.content)
# # Display sources
# with st.expander("View Source Documents"):
# for i, context in enumerate(contexts, 1):
# st.markdown(f"**Source {i}:**")
# st.markdown(context)
# st.divider()
# # Add thumbs up/down for answer quality
# col1, col2 = st.columns(2)
# with col1:
# if st.button("πŸ‘ Helpful"):
# st.success("Thank you for your feedback!")
# with col2:
# if st.button("πŸ‘Ž Not Helpful"):
# st.info("Thank you for your feedback. Please let us know how we can improve in the sidebar.")
# except Exception as e:
# st.error(f"Error generating answer: {str(e)}")
# # Footer
# st.markdown("---")
# st.markdown("*This is a research project. Always verify information with official sources.*")
# # SQLite compatibility fix for Chromadb
# import sqlite3
# print(f"SQLite version: {sqlite3.sqlite_version}")
# # Try alternative vector store approach if SQLite version is too old
# import os
# os.environ["LANGCHAIN_CHROMA_ALLOW_DEPRECATED_BACKEND"] = "true"
# import streamlit as st
# from langchain_huggingface import HuggingFaceEmbeddings
# from langchain_chroma import Chroma # Updated from langchain_community.vectorstores
# from langchain_groq import ChatGroq
# import os
# from dotenv import load_dotenv
# from cloud_storage import download_vectorstore
# # Page config
# st.set_page_config(
# page_title="UK Building Regulations Assistant",
# page_icon="πŸ—οΈ",
# layout="wide"
# )
# # Load environment variables
# load_dotenv()
# # Initialize RAG components
# @st.cache_resource
# def init_rag():
# """Initialize RAG components with caching"""
# try:
# # Check if main_chroma_data exists
# if not os.path.exists("./main_chroma_data"):
# download_vectorstore()
# # st.error("Error: main_chroma_data directory not found. Please check the directory path.")
# # return None, None
# # Initialize embeddings
# try:
# embeddings = HuggingFaceEmbeddings(
# model_name="sentence-transformers/all-mpnet-base-v2",
# encode_kwargs={'normalize_embeddings': True} # Added for stability
# )
# except Exception as e:
# st.error(f"Error initializing embeddings: {str(e)}")
# return None, None
# # Initialize vector store
# # try:
# # vectorstore = Chroma(
# # collection_name="main_construction_rag",
# # embedding_function=embeddings,
# # persist_directory="./main_chroma_data"
# # )
# # except Exception as e:
# # st.error(f"Error initializing vector store: {str(e)}")
# # return None, None
# # Initialize vector store
# try:
# vectorstore = Chroma(
# collection_name="main_construction_rag",
# embedding_function=embeddings,
# persist_directory="./main_chroma_data"
# )
# except Exception as e:
# st.warning("Using deprecated backend due to SQLite version constraints")
# # Use alternative initialization if needed
# from langchain_community.vectorstores import Chroma as ChromaDeprecated
# vectorstore = ChromaDeprecated(
# collection_name="main_construction_rag",
# embedding_function=embeddings,
# persist_directory="./main_chroma_data"
# )
# # Check if GROQ API key is set
# groq_api_key = os.getenv("GROQ_API_KEY")
# if not groq_api_key:
# st.error("Error: GROQ_API_KEY not found in environment variables")
# return None, None
# # Initialize LLM
# try:
# llm = ChatGroq(
# api_key=groq_api_key,
# model_name="llama-3.3-70b-versatile",
# temperature=0.1
# )
# except Exception as e:
# st.error(f"Error initializing LLM: {str(e)}")
# return None, None
# return vectorstore, llm
# except Exception as e:
# st.error(f"Error initializing RAG system: {str(e)}")
# return None, None
# # Initialize
# vectorstore, llm = init_rag()
# # Sidebar for feedback
# with st.sidebar:
# st.title("πŸ“ StructureGPT Feedback")
# feedback = st.text_area("Share your feedback on the answers:", height=100)
# if st.button("Submit Feedback"):
# st.success("Thank you for your feedback!")
# # Main interface
# st.title("πŸ—οΈ StructureGPT - UK Building Regulations AI Assistant")
# st.markdown("""
# This AI assistant helps answer questions about UK building regulations using:
# - Official Building Regulations documents
# - Expert YouTube content from LABC, RICS, and other authorities
# - Technical documentation and guidance
# """)
# # Add testing phase notice with warning styling
# st.warning("""
# ⚠️ **TESTING PHASE** - StructureGPT is currently in beta testing, focusing only on UK Building Regulations Parts A (Structure), B (Fire Safety), and C (Site Preparation and Resistance to Contaminants and Moisture). Additional regulation parts will be added soon.
# """)
# # User input
# question = st.text_input("Enter your question about UK building regulations:")
# if st.button("Get Answer"):
# if not question:
# st.warning("Please enter a question.")
# elif vectorstore is None or llm is None:
# st.error("RAG system not properly initialized. Please check the errors above.")
# else:
# with st.spinner("Searching regulations and generating answer..."):
# try:
# # Get relevant documents
# docs = vectorstore.similarity_search(question, k=4)
# contexts = [doc.page_content for doc in docs]
# # Generate answer
# context_text = "\n\n".join(contexts)
# prompt = f"""Based on the following context from UK Building Regulations, provide a clear and detailed answer to the question.
# Include specific references to regulations where available.
# Question: {question}
# Context: {context_text}
# Answer:"""
# response = llm.invoke(prompt)
# # Display answer
# st.markdown("### Answer")
# st.write(response.content)
# # Display sources
# with st.expander("View Source Documents"):
# for i, context in enumerate(contexts, 1):
# st.markdown(f"**Source {i}:**")
# st.markdown(context)
# st.divider()
# # Add thumbs up/down for answer quality
# col1, col2 = st.columns(2)
# with col1:
# if st.button("πŸ‘ Helpful"):
# st.success("Thank you for your feedback!")
# with col2:
# if st.button("πŸ‘Ž Not Helpful"):
# st.info("Thank you for your feedback. Please let us know how we can improve in the sidebar.")
# except Exception as e:
# st.error(f"Error generating answer: {str(e)}")
# # Footer
# st.markdown("---")
# st.markdown("*StructureGPT is a research project in testing phase. Currently supporting Parts A (Structure), B (Fire Safety), and C (Site Preparation) of UK Building Regulations. Always verify information with official sources.*")
# SQLite compatibility fix for Chromadb
import sqlite3
print(f"SQLite version: {sqlite3.sqlite_version}")
# Try alternative vector store approach if SQLite version is too old
import os
os.environ["LANGCHAIN_CHROMA_ALLOW_DEPRECATED_BACKEND"] = "true"
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
from cloud_storage import download_vectorstore
# Page config
st.set_page_config(
page_title="StructureGPT - UK Building Regulations Assistant",
page_icon="πŸ—οΈ",
layout="wide"
)
# Load environment variables
load_dotenv()
# Initialize RAG components
@st.cache_resource
def init_rag():
"""Initialize RAG components with caching"""
try:
# Check if main_chroma_data exists
if not os.path.exists("./main_chroma_data"):
download_vectorstore()
# Initialize embeddings
try:
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2",
encode_kwargs={'normalize_embeddings': True} # Added for stability
)
except Exception as e:
st.error(f"Error initializing embeddings: {str(e)}")
return None, None, None
# Initialize vector store
try:
vectorstore = Chroma(
collection_name="main_construction_rag",
embedding_function=embeddings,
persist_directory="./main_chroma_data"
)
except Exception as e:
st.warning("Using deprecated backend due to SQLite version constraints")
# Use alternative initialization if needed
from langchain_community.vectorstores import Chroma as ChromaDeprecated
vectorstore = ChromaDeprecated(
collection_name="main_construction_rag",
embedding_function=embeddings,
persist_directory="./main_chroma_data"
)
# Check if GROQ API key is set
groq_api_key = os.getenv("GROQ_API_KEY")
if not groq_api_key:
st.error("Error: GROQ_API_KEY not found in environment variables")
return None, None, None
# Initialize LLMs - both models
try:
llm_70b = ChatGroq(
api_key=groq_api_key,
model_name="llama-3.3-70b-versatile",
temperature=0.1
)
llm_8b = ChatGroq(
api_key=groq_api_key,
model_name="llama3-8b-8192",
temperature=0.1
)
except Exception as e:
st.error(f"Error initializing LLMs: {str(e)}")
return None, None, None
return vectorstore, llm_70b, llm_8b
except Exception as e:
st.error(f"Error initializing RAG system: {str(e)}")
return None, None, None
# Initialize
vectorstore, llm_70b, llm_8b = init_rag()
# Sidebar for model selection and feedback
with st.sidebar:
st.title("πŸ”§ Model Settings")
# Model selection toggle
model_option = st.radio(
"Select Model:",
["Llama-3.3-70B (More accurate, slower)", "Llama3-8B (Faster, less accurate)"],
index=0, # Default to 70B model
help="Choose between more accurate (70B) or faster (8B) model"
)
# Display selected model details
if model_option == "Llama-3.3-70B (More accurate, slower)":
st.info("Using Llama-3.3-70B: Higher accuracy but slightly slower responses")
selected_llm = llm_70b
else:
st.info("Using Llama3-8B: Faster responses with good accuracy")
selected_llm = llm_8b
st.divider()
# Feedback section
st.title("πŸ“ Feedback")
feedback = st.text_area("Share your feedback on the answers:", height=100)
if st.button("Submit Feedback"):
st.success("Thank you for your feedback!")
# Main interface
st.title("πŸ—οΈ StructureGPT - UK Building Regulations AI Assistant")
st.markdown("""
This AI assistant helps answer questions about UK building regulations using:
- Official Building Regulations documents
- Expert YouTube content from LABC, RICS, and other authorities
- Technical documentation and guidance
""")
# Add testing phase notice with warning styling
st.warning("""
⚠️ **TESTING PHASE** - StructureGPT is currently in beta testing, focusing only on UK Building Regulations Parts A (Structure), B (Fire Safety), and C (Site Preparation and Resistance to Contaminants and Moisture). Additional regulation parts will be added soon.
""")
# User input
question = st.text_input("Enter your question about UK building regulations:")
if st.button("Get Answer"):
if not question:
st.warning("Please enter a question.")
elif vectorstore is None or selected_llm is None:
st.error("RAG system not properly initialized. Please check the errors above.")
else:
with st.spinner(f"Searching regulations and generating answer using {model_option.split(' ')[0]}..."):
try:
# Get relevant documents
docs = vectorstore.similarity_search(question, k=4)
contexts = [doc.page_content for doc in docs]
# Generate answer
context_text = "\n\n".join(contexts)
prompt = f"""Based on the following context from UK Building Regulations, provide a clear and detailed answer to the question.
Include specific references to regulations where available.
Question: {question}
Context: {context_text}
Answer:"""
response = selected_llm.invoke(prompt)
# Display answer
st.markdown("### Answer")
st.write(response.content)
# Display model used
st.caption(f"Answer generated using {model_option.split(' ')[0]}")
# Display sources
with st.expander("View Source Documents"):
for i, context in enumerate(contexts, 1):
st.markdown(f"**Source {i}:**")
st.markdown(context)
st.divider()
# Add feedback section
st.subheader("Was this answer helpful?")
col1, col2 = st.columns(2)
with col1:
if st.button("πŸ‘ Helpful"):
st.success("Thank you for your feedback!")
with col2:
if st.button("πŸ‘Ž Not Helpful"):
st.info("Thank you for your feedback. Please let us know how we can improve in the sidebar.")
except Exception as e:
st.error(f"Error generating answer: {str(e)}")
# Footer
st.markdown("---")
st.markdown("*StructureGPT is a research project in testing phase. Currently supporting Parts A (Structure), B (Fire Safety), and C (Site Preparation) of UK Building Regulations. Always verify information with official sources.*")