Spaces:

SamuelJaja
/

uk-building-regulations-bot

Sleeping

Samuel-Datubo-Jaja

modified app.py and sagemaker_deploy.py

018d0ff 4 months ago

19.6 kB

	# import streamlit as st
	# from langchain_openai import OpenAIEmbeddings # Changed to OpenAI embeddings
	# from langchain_chroma import Chroma
	# from langchain_groq import ChatGroq
	# import os
	# from dotenv import load_dotenv

	# # Page config
	# st.set_page_config(
	# page_title="UK Construction Regulations Assistant",
	# page_icon="🏗️",
	# layout="wide"
	# )

	# # Load environment variables
	# load_dotenv()

	# # Initialize RAG components
	# @st.cache_resource
	# def init_rag():
	# """Initialize RAG components with caching"""
	# try:
	# # Check if main_chroma_data exists
	# if not os.path.exists("./main_chroma_data"):
	# st.error("Error: main_chroma_data directory not found. Please check the directory path.")
	# return None, None

	# # Initialize embeddings
	# try:
	# embeddings = OpenAIEmbeddings(
	# api_key=os.getenv("OPENAI_API_KEY")
	# )
	# except Exception as e:
	# st.error(f"Error initializing embeddings: {str(e)}")
	# return None, None

	# # Initialize vector store
	# try:
	# vectorstore = Chroma(
	# collection_name="main_construction_rag",
	# embedding_function=embeddings,
	# persist_directory="./main_chroma_data"
	# )
	# except Exception as e:
	# st.error(f"Error initializing vector store: {str(e)}")
	# return None, None

	# # Check if GROQ API key is set
	# groq_api_key = os.getenv("GROQ_API_KEY")
	# if not groq_api_key:
	# st.error("Error: GROQ_API_KEY not found in environment variables")
	# return None, None

	# # Initialize LLM
	# try:
	# llm = ChatGroq(
	# api_key=groq_api_key,
	# model_name="llama-3.3-70b-versatile",
	# temperature=0
	# )
	# except Exception as e:
	# st.error(f"Error initializing LLM: {str(e)}")
	# return None, None

	# return vectorstore, llm
	# except Exception as e:
	# st.error(f"Error initializing RAG system: {str(e)}")
	# return None, None

	# # Initialize
	# vectorstore, llm = init_rag()

	# # Sidebar for feedback
	# with st.sidebar:
	# st.title("📝 Feedback")
	# feedback = st.text_area("Share your feedback on the answers:", height=100)
	# if st.button("Submit Feedback"):
	# st.success("Thank you for your feedback!")

	# # Main interface
	# st.title("🏗️ UK Construction Regulations Assistant")
	# st.markdown("""
	# This AI assistant helps answer questions about UK construction regulations using:
	# - Official Building Regulations documents
	# - Expert YouTube content from LABC, RICS, and other authorities
	# - Technical documentation and guidance
	# """)

	# # User input
	# question = st.text_input("Enter your question about UK construction regulations:")

	# if st.button("Get Answer"):
	# if not question:
	# st.warning("Please enter a question.")
	# elif vectorstore is None or llm is None:
	# st.error("RAG system not properly initialized. Please check the errors above.")
	# else:
	# with st.spinner("Searching regulations and generating answer..."):
	# try:
	# # Get relevant documents
	# docs = vectorstore.similarity_search(question, k=4)
	# contexts = [doc.page_content for doc in docs]

	# # Generate answer
	# context_text = "\n\n".join(contexts)
	# prompt = f"""Based on the following context from UK Building Regulations, provide a clear and detailed answer to the question.
	# Include specific references to regulations where available.

	# Question: {question}

	# Context: {context_text}

	# Answer:"""

	# response = llm.invoke(prompt)

	# # Display answer
	# st.markdown("### Answer")
	# st.write(response.content)

	# # Display sources
	# with st.expander("View Source Documents"):
	# for i, context in enumerate(contexts, 1):
	# st.markdown(f"Source {i}:")
	# st.markdown(context)
	# st.divider()

	# # Add thumbs up/down for answer quality
	# col1, col2 = st.columns(2)
	# with col1:
	# if st.button("👍 Helpful"):
	# st.success("Thank you for your feedback!")
	# with col2:
	# if st.button("👎 Not Helpful"):
	# st.info("Thank you for your feedback. Please let us know how we can improve in the sidebar.")

	# except Exception as e:
	# st.error(f"Error generating answer: {str(e)}")

	# # Footer
	# st.markdown("---")
	# st.markdown("This is a research project. Always verify information with official sources.")


	# # SQLite compatibility fix for Chromadb
	# import sqlite3
	# print(f"SQLite version: {sqlite3.sqlite_version}")

	# # Try alternative vector store approach if SQLite version is too old
	# import os
	# os.environ["LANGCHAIN_CHROMA_ALLOW_DEPRECATED_BACKEND"] = "true"

	# import streamlit as st
	# from langchain_huggingface import HuggingFaceEmbeddings
	# from langchain_chroma import Chroma # Updated from langchain_community.vectorstores
	# from langchain_groq import ChatGroq
	# import os
	# from dotenv import load_dotenv
	# from cloud_storage import download_vectorstore

	# # Page config
	# st.set_page_config(
	# page_title="UK Building Regulations Assistant",
	# page_icon="🏗️",
	# layout="wide"
	# )

	# # Load environment variables
	# load_dotenv()

	# # Initialize RAG components
	# @st.cache_resource
	# def init_rag():
	# """Initialize RAG components with caching"""
	# try:
	# # Check if main_chroma_data exists
	# if not os.path.exists("./main_chroma_data"):
	# download_vectorstore()
	# # st.error("Error: main_chroma_data directory not found. Please check the directory path.")
	# # return None, None

	# # Initialize embeddings
	# try:
	# embeddings = HuggingFaceEmbeddings(
	# model_name="sentence-transformers/all-mpnet-base-v2",
	# encode_kwargs={'normalize_embeddings': True} # Added for stability
	# )
	# except Exception as e:
	# st.error(f"Error initializing embeddings: {str(e)}")
	# return None, None

	# # Initialize vector store
	# # try:
	# # vectorstore = Chroma(
	# # collection_name="main_construction_rag",
	# # embedding_function=embeddings,
	# # persist_directory="./main_chroma_data"
	# # )
	# # except Exception as e:
	# # st.error(f"Error initializing vector store: {str(e)}")
	# # return None, None

	# # Initialize vector store
	# try:
	# vectorstore = Chroma(
	# collection_name="main_construction_rag",
	# embedding_function=embeddings,
	# persist_directory="./main_chroma_data"
	# )
	# except Exception as e:
	# st.warning("Using deprecated backend due to SQLite version constraints")
	# # Use alternative initialization if needed
	# from langchain_community.vectorstores import Chroma as ChromaDeprecated
	# vectorstore = ChromaDeprecated(
	# collection_name="main_construction_rag",
	# embedding_function=embeddings,
	# persist_directory="./main_chroma_data"
	# )

	# # Check if GROQ API key is set
	# groq_api_key = os.getenv("GROQ_API_KEY")
	# if not groq_api_key:
	# st.error("Error: GROQ_API_KEY not found in environment variables")
	# return None, None

	# # Initialize LLM
	# try:
	# llm = ChatGroq(
	# api_key=groq_api_key,
	# model_name="llama-3.3-70b-versatile",
	# temperature=0.1
	# )
	# except Exception as e:
	# st.error(f"Error initializing LLM: {str(e)}")
	# return None, None

	# return vectorstore, llm
	# except Exception as e:
	# st.error(f"Error initializing RAG system: {str(e)}")
	# return None, None



	# # Initialize
	# vectorstore, llm = init_rag()

	# # Sidebar for feedback
	# with st.sidebar:
	# st.title("📝 StructureGPT Feedback")
	# feedback = st.text_area("Share your feedback on the answers:", height=100)
	# if st.button("Submit Feedback"):
	# st.success("Thank you for your feedback!")

	# # Main interface
	# st.title("🏗️ StructureGPT - UK Building Regulations AI Assistant")
	# st.markdown("""
	# This AI assistant helps answer questions about UK building regulations using:
	# - Official Building Regulations documents
	# - Expert YouTube content from LABC, RICS, and other authorities
	# - Technical documentation and guidance
	# """)

	# # Add testing phase notice with warning styling
	# st.warning("""
	# ⚠️ TESTING PHASE - StructureGPT is currently in beta testing, focusing only on UK Building Regulations Parts A (Structure), B (Fire Safety), and C (Site Preparation and Resistance to Contaminants and Moisture). Additional regulation parts will be added soon.
	# """)

	# # User input
	# question = st.text_input("Enter your question about UK building regulations:")

	# if st.button("Get Answer"):
	# if not question:
	# st.warning("Please enter a question.")
	# elif vectorstore is None or llm is None:
	# st.error("RAG system not properly initialized. Please check the errors above.")
	# else:
	# with st.spinner("Searching regulations and generating answer..."):
	# try:
	# # Get relevant documents
	# docs = vectorstore.similarity_search(question, k=4)
	# contexts = [doc.page_content for doc in docs]

	# # Generate answer
	# context_text = "\n\n".join(contexts)
	# prompt = f"""Based on the following context from UK Building Regulations, provide a clear and detailed answer to the question.
	# Include specific references to regulations where available.

	# Question: {question}

	# Context: {context_text}

	# Answer:"""

	# response = llm.invoke(prompt)

	# # Display answer
	# st.markdown("### Answer")
	# st.write(response.content)

	# # Display sources
	# with st.expander("View Source Documents"):
	# for i, context in enumerate(contexts, 1):
	# st.markdown(f"Source {i}:")
	# st.markdown(context)
	# st.divider()

	# # Add thumbs up/down for answer quality
	# col1, col2 = st.columns(2)
	# with col1:
	# if st.button("👍 Helpful"):
	# st.success("Thank you for your feedback!")
	# with col2:
	# if st.button("👎 Not Helpful"):
	# st.info("Thank you for your feedback. Please let us know how we can improve in the sidebar.")

	# except Exception as e:
	# st.error(f"Error generating answer: {str(e)}")

	# # Footer
	# st.markdown("---")
	# st.markdown("StructureGPT is a research project in testing phase. Currently supporting Parts A (Structure), B (Fire Safety), and C (Site Preparation) of UK Building Regulations. Always verify information with official sources.")


	# SQLite compatibility fix for Chromadb
	import sqlite3
	print(f"SQLite version: {sqlite3.sqlite_version}")

	# Try alternative vector store approach if SQLite version is too old
	import os
	os.environ["LANGCHAIN_CHROMA_ALLOW_DEPRECATED_BACKEND"] = "true"

	import streamlit as st
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_chroma import Chroma
	from langchain_groq import ChatGroq
	import os
	from dotenv import load_dotenv
	from cloud_storage import download_vectorstore

	# Page config
	st.set_page_config(
	page_title="StructureGPT - UK Building Regulations Assistant",
	page_icon="🏗️",
	layout="wide"
	)

	# Load environment variables
	load_dotenv()

	# Initialize RAG components
	@st.cache_resource
	def init_rag():
	"""Initialize RAG components with caching"""
	try:
	# Check if main_chroma_data exists
	if not os.path.exists("./main_chroma_data"):
	download_vectorstore()

	# Initialize embeddings
	try:
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-mpnet-base-v2",
	encode_kwargs={'normalize_embeddings': True} # Added for stability
	)
	except Exception as e:
	st.error(f"Error initializing embeddings: {str(e)}")
	return None, None, None

	# Initialize vector store
	try:
	vectorstore = Chroma(
	collection_name="main_construction_rag",
	embedding_function=embeddings,
	persist_directory="./main_chroma_data"
	)
	except Exception as e:
	st.warning("Using deprecated backend due to SQLite version constraints")
	# Use alternative initialization if needed
	from langchain_community.vectorstores import Chroma as ChromaDeprecated
	vectorstore = ChromaDeprecated(
	collection_name="main_construction_rag",
	embedding_function=embeddings,
	persist_directory="./main_chroma_data"
	)

	# Check if GROQ API key is set
	groq_api_key = os.getenv("GROQ_API_KEY")
	if not groq_api_key:
	st.error("Error: GROQ_API_KEY not found in environment variables")
	return None, None, None

	# Initialize LLMs - both models
	try:
	llm_70b = ChatGroq(
	api_key=groq_api_key,
	model_name="llama-3.3-70b-versatile",
	temperature=0.1
	)

	llm_8b = ChatGroq(
	api_key=groq_api_key,
	model_name="llama3-8b-8192",
	temperature=0.1
	)
	except Exception as e:
	st.error(f"Error initializing LLMs: {str(e)}")
	return None, None, None

	return vectorstore, llm_70b, llm_8b
	except Exception as e:
	st.error(f"Error initializing RAG system: {str(e)}")
	return None, None, None

	# Initialize
	vectorstore, llm_70b, llm_8b = init_rag()

	# Sidebar for model selection and feedback
	with st.sidebar:
	st.title("🔧 Model Settings")

	# Model selection toggle
	model_option = st.radio(
	"Select Model:",
	["Llama-3.3-70B (More accurate, slower)", "Llama3-8B (Faster, less accurate)"],
	index=0, # Default to 70B model
	help="Choose between more accurate (70B) or faster (8B) model"
	)

	# Display selected model details
	if model_option == "Llama-3.3-70B (More accurate, slower)":
	st.info("Using Llama-3.3-70B: Higher accuracy but slightly slower responses")
	selected_llm = llm_70b
	else:
	st.info("Using Llama3-8B: Faster responses with good accuracy")
	selected_llm = llm_8b

	st.divider()

	# Feedback section
	st.title("📝 Feedback")
	feedback = st.text_area("Share your feedback on the answers:", height=100)
	if st.button("Submit Feedback"):
	st.success("Thank you for your feedback!")

	# Main interface
	st.title("🏗️ StructureGPT - UK Building Regulations AI Assistant")
	st.markdown("""
	This AI assistant helps answer questions about UK building regulations using:
	- Official Building Regulations documents
	- Expert YouTube content from LABC, RICS, and other authorities
	- Technical documentation and guidance
	""")

	# Add testing phase notice with warning styling
	st.warning("""
	⚠️ TESTING PHASE - StructureGPT is currently in beta testing, focusing only on UK Building Regulations Parts A (Structure), B (Fire Safety), and C (Site Preparation and Resistance to Contaminants and Moisture). Additional regulation parts will be added soon.
	""")

	# User input
	question = st.text_input("Enter your question about UK building regulations:")

	if st.button("Get Answer"):
	if not question:
	st.warning("Please enter a question.")
	elif vectorstore is None or selected_llm is None:
	st.error("RAG system not properly initialized. Please check the errors above.")
	else:
	with st.spinner(f"Searching regulations and generating answer using {model_option.split(' ')[0]}..."):
	try:
	# Get relevant documents
	docs = vectorstore.similarity_search(question, k=4)
	contexts = [doc.page_content for doc in docs]

	# Generate answer
	context_text = "\n\n".join(contexts)
	prompt = f"""Based on the following context from UK Building Regulations, provide a clear and detailed answer to the question.
	Include specific references to regulations where available.

	Question: {question}

	Context: {context_text}

	Answer:"""

	response = selected_llm.invoke(prompt)

	# Display answer
	st.markdown("### Answer")
	st.write(response.content)

	# Display model used
	st.caption(f"Answer generated using {model_option.split(' ')[0]}")

	# Display sources
	with st.expander("View Source Documents"):
	for i, context in enumerate(contexts, 1):
	st.markdown(f"Source {i}:")
	st.markdown(context)
	st.divider()

	# Add feedback section
	st.subheader("Was this answer helpful?")
	col1, col2 = st.columns(2)
	with col1:
	if st.button("👍 Helpful"):
	st.success("Thank you for your feedback!")
	with col2:
	if st.button("👎 Not Helpful"):
	st.info("Thank you for your feedback. Please let us know how we can improve in the sidebar.")

	except Exception as e:
	st.error(f"Error generating answer: {str(e)}")

	# Footer
	st.markdown("---")
	st.markdown("StructureGPT is a research project in testing phase. Currently supporting Parts A (Structure), B (Fire Safety), and C (Site Preparation) of UK Building Regulations. Always verify information with official sources.")