uk-building-regulations-bot / cloud_storage.py
Samuel-Datubo-Jaja
Add application file
8c7aa17
import boto3
import os
import zipfile
from botocore.exceptions import ClientError
import streamlit as st
# def download_vectorstore():
# """
# Download vector store from S3 and extract it
# """
# try:
# # Get AWS credentials from Streamlit secrets
# aws_access_key = st.secrets["AWS_ACCESS_KEY_ID"]
# aws_secret_key = st.secrets["AWS_SECRET_ACCESS_KEY"]
# bucket_name = st.secrets["AWS_S3_BUCKET"]
# # Create S3 client
# s3 = boto3.client(
# 's3',
# region_name='eu-west-2',
# aws_access_key_id=aws_access_key,
# aws_secret_access_key=aws_secret_key
# )
# # Download zip file
# zip_path = "main_chroma_data.zip"
# st.info("Downloading vector store from S3...")
# s3.download_file(bucket_name, 'main_chroma_data.zip', zip_path)
# # Extract zip file
# with zipfile.ZipFile(zip_path, 'r') as zip_ref:
# zip_ref.extractall("./")
# # Remove zip file
# os.remove(zip_path)
# st.success("Vector store downloaded and extracted successfully!")
# except ClientError as e:
# st.error(f"Error downloading vector store: {str(e)}")
# raise e
# except Exception as e:
# st.error(f"Unexpected error: {str(e)}")
# raise e
def download_vectorstore():
"""Download vector store from S3 and extract it"""
try:
# Use direct URL approach
url = "https://uk-building-regulations-vectorstore.s3.eu-west-2.amazonaws.com/main_chroma_data.zip"
import requests
st.info("Downloading vector store from S3...")
zip_path = "main_chroma_data.zip"
r = requests.get(url, stream=True)
with open(zip_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# Extract zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall("./")
# Remove zip file
os.remove(zip_path)
st.success("Vector store downloaded and extracted successfully!")
except Exception as e:
st.error(f"Detailed error: {str(e)}")
raise e
def upload_vectorstore():
"""
Zip and upload vector store to S3
Note: This is for your local use, not needed in the app
"""
# Create zip file of main_chroma_data
with zipfile.ZipFile('main_chroma_data.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk('main_chroma_data'):
for file in files:
zipf.write(os.path.join(root, file))
# Upload to S3
aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
bucket_name = os.getenv("AWS_S3_BUCKET")
s3 = boto3.client(
's3',
aws_access_key_id=aws_access_key,
aws_secret_access_key=aws_secret_key
)
s3.upload_file('main_chroma_data.zip', bucket_name, 'main_chroma_data.zip')
print("Vector store uploaded successfully!")
# Add at the bottom of cloud_storage.py
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "upload":
upload_vectorstore()
# run this on bash to upload vectore store
# python cloud_storage.py upload