FauziIsyrinApridal commited on
Commit
22ea197
·
1 Parent(s): 1c19c94

update penyimpanan vectore_store ke supabase

Browse files
Files changed (2) hide show
  1. app.py +17 -9
  2. app/document_processor.py +51 -34
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import streamlit as st
3
  import os
4
  import tempfile
@@ -20,7 +19,7 @@ load_dotenv()
20
 
21
  # Supabase configuration
22
  BUCKET_NAME = "pnp-bot-storage-archive"
23
- VECTOR_STORE_FILE = "vector_store.zip"
24
  DATA_DIR = "data"
25
 
26
  @traceable(name="Create RAG Conversational Chain")
@@ -38,7 +37,7 @@ def create_conversational_chain(vector_store):
38
 
39
  chain = ConversationalRetrievalChain.from_llm(
40
  llm,
41
- retriever=vector_store.as_retriever(search_kwargs={"k": 10}),
42
  combine_docs_chain_kwargs={"prompt": sahabat_prompt},
43
  return_source_documents=True,
44
  memory=memory
@@ -60,13 +59,22 @@ def get_latest_data_timestamp(folder):
60
  return latest_time
61
 
62
  def get_supabase_vector_store_timestamp():
63
- """Get the timestamp of vector store in Supabase storage"""
64
  try:
65
  response = supabase.storage.from_(BUCKET_NAME).list()
 
 
66
  for file in response:
67
- if file['name'] == VECTOR_STORE_FILE:
68
- return file['updated_at']
 
 
 
 
 
 
69
  return None
 
70
  except Exception as e:
71
  print(f"Error getting Supabase timestamp: {e}")
72
  return None
@@ -101,7 +109,7 @@ def main():
101
 
102
  # Save to Supabase instead of local storage
103
  with st.spinner("Uploading vector store to Supabase..."):
104
- success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_FILE)
105
  if success:
106
  st.success("Vector store uploaded to Supabase successfully!")
107
  else:
@@ -112,7 +120,7 @@ def main():
112
  else:
113
  # Load vector store from Supabase
114
  with st.spinner("Loading vector store from Supabase..."):
115
- vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_FILE)
116
  if vector_store:
117
  st.success("Vector store loaded from Supabase successfully!")
118
  else:
@@ -122,7 +130,7 @@ def main():
122
  vector_store = st.session_state.get('vector_store')
123
  if vector_store is None:
124
  # Fallback: load from Supabase if not in session
125
- vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_FILE)
126
 
127
  st.session_state['vector_store'] = vector_store
128
 
 
 
1
  import streamlit as st
2
  import os
3
  import tempfile
 
19
 
20
  # Supabase configuration
21
  BUCKET_NAME = "pnp-bot-storage-archive"
22
+ VECTOR_STORE_PREFIX = "vector_store" # Changed from file name to prefix
23
  DATA_DIR = "data"
24
 
25
  @traceable(name="Create RAG Conversational Chain")
 
37
 
38
  chain = ConversationalRetrievalChain.from_llm(
39
  llm,
40
+ retriever=vector_store.as_retriever(search_kwargs={"k": 6}),
41
  combine_docs_chain_kwargs={"prompt": sahabat_prompt},
42
  return_source_documents=True,
43
  memory=memory
 
59
  return latest_time
60
 
61
  def get_supabase_vector_store_timestamp():
62
+ """Get the timestamp of vector store files in Supabase storage"""
63
  try:
64
  response = supabase.storage.from_(BUCKET_NAME).list()
65
+ timestamps = []
66
+
67
  for file in response:
68
+ if file['name'].startswith(VECTOR_STORE_PREFIX) and (
69
+ file['name'].endswith('.faiss') or file['name'].endswith('.pkl')
70
+ ):
71
+ timestamps.append(file['updated_at'])
72
+
73
+ # Return the latest timestamp if both files exist
74
+ if len(timestamps) >= 2:
75
+ return max(timestamps)
76
  return None
77
+
78
  except Exception as e:
79
  print(f"Error getting Supabase timestamp: {e}")
80
  return None
 
109
 
110
  # Save to Supabase instead of local storage
111
  with st.spinner("Uploading vector store to Supabase..."):
112
+ success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
113
  if success:
114
  st.success("Vector store uploaded to Supabase successfully!")
115
  else:
 
120
  else:
121
  # Load vector store from Supabase
122
  with st.spinner("Loading vector store from Supabase..."):
123
+ vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
124
  if vector_store:
125
  st.success("Vector store loaded from Supabase successfully!")
126
  else:
 
130
  vector_store = st.session_state.get('vector_store')
131
  if vector_store is None:
132
  # Fallback: load from Supabase if not in session
133
+ vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
134
 
135
  st.session_state['vector_store'] = vector_store
136
 
app/document_processor.py CHANGED
@@ -6,28 +6,37 @@ import tempfile
6
  import zipfile
7
  import streamlit as st
8
 
9
- def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_name):
10
- """Save vector store to Supabase storage as a zip file."""
11
  try:
12
  with tempfile.TemporaryDirectory() as temp_dir:
13
  # Save vector store locally first
14
  local_path = os.path.join(temp_dir, "vector_store")
15
  vector_store.save_local(local_path)
16
 
17
- # Create zip file
18
- zip_path = os.path.join(temp_dir, "vector_store.zip")
19
- with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
20
- for root, dirs, files in os.walk(local_path):
21
- for file in files:
22
- file_path = os.path.join(root, file)
23
- arc_name = os.path.relpath(file_path, local_path)
24
- zipf.write(file_path, arc_name)
 
 
25
 
26
- # Upload to Supabase
27
- with open(zip_path, 'rb') as f:
28
- response = supabase.storage.from_(bucket_name).upload(file_name, f, {"upsert": "true"})
29
-
30
- print(f"Vector store uploaded to Supabase: {bucket_name}/{file_name}")
 
 
 
 
 
 
 
31
  return True
32
 
33
  except Exception as e:
@@ -35,26 +44,34 @@ def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_name
35
  st.error(f"Error uploading to Supabase: {e}")
36
  return False
37
 
38
- def load_vector_store_from_supabase(supabase, bucket_name, file_name):
39
- """Load vector store from Supabase storage."""
40
  try:
41
- # Download from Supabase
42
- response = supabase.storage.from_(bucket_name).download(file_name)
43
-
44
- if not response:
45
- print("Vector store file not found in Supabase.")
46
- return None
47
-
48
  with tempfile.TemporaryDirectory() as temp_dir:
49
- # Save downloaded zip file
50
- zip_path = os.path.join(temp_dir, "vector_store.zip")
51
- with open(zip_path, 'wb') as f:
52
- f.write(response)
 
 
 
 
 
 
 
 
 
53
 
54
- # Extract zip file
55
- extract_path = os.path.join(temp_dir, "vector_store")
56
- with zipfile.ZipFile(zip_path, 'r') as zipf:
57
- zipf.extractall(extract_path)
 
 
 
 
 
 
58
 
59
  # Load vector store
60
  embeddings = HuggingFaceEmbeddings(
@@ -64,12 +81,12 @@ def load_vector_store_from_supabase(supabase, bucket_name, file_name):
64
  )
65
 
66
  vector_store = FAISS.load_local(
67
- extract_path,
68
  embeddings,
69
  allow_dangerous_deserialization=True
70
  )
71
 
72
- print(f"Vector store loaded from Supabase: {bucket_name}/{file_name}")
73
  return vector_store
74
 
75
  except Exception as e:
 
6
  import zipfile
7
  import streamlit as st
8
 
9
+ def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
10
+ """Save vector store to Supabase storage as separate files."""
11
  try:
12
  with tempfile.TemporaryDirectory() as temp_dir:
13
  # Save vector store locally first
14
  local_path = os.path.join(temp_dir, "vector_store")
15
  vector_store.save_local(local_path)
16
 
17
+ # Upload index.faiss
18
+ faiss_file = os.path.join(local_path, "index.faiss")
19
+ if os.path.exists(faiss_file):
20
+ with open(faiss_file, 'rb') as f:
21
+ supabase.storage.from_(bucket_name).upload(
22
+ f"{file_prefix}_index.faiss",
23
+ f,
24
+ {"upsert": "true"}
25
+ )
26
+ print(f"Uploaded: {file_prefix}_index.faiss")
27
 
28
+ # Upload index.pkl
29
+ pkl_file = os.path.join(local_path, "index.pkl")
30
+ if os.path.exists(pkl_file):
31
+ with open(pkl_file, 'rb') as f:
32
+ supabase.storage.from_(bucket_name).upload(
33
+ f"{file_prefix}_index.pkl",
34
+ f,
35
+ {"upsert": "true"}
36
+ )
37
+ print(f"Uploaded: {file_prefix}_index.pkl")
38
+
39
+ print(f"Vector store uploaded to Supabase bucket: {bucket_name}")
40
  return True
41
 
42
  except Exception as e:
 
44
  st.error(f"Error uploading to Supabase: {e}")
45
  return False
46
 
47
+ def load_vector_store_from_supabase(supabase, bucket_name, file_prefix="vector_store"):
48
+ """Load vector store from Supabase storage from separate files."""
49
  try:
 
 
 
 
 
 
 
50
  with tempfile.TemporaryDirectory() as temp_dir:
51
+ local_path = os.path.join(temp_dir, "vector_store")
52
+ os.makedirs(local_path, exist_ok=True)
53
+
54
+ # Download index.faiss
55
+ try:
56
+ faiss_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.faiss")
57
+ faiss_file = os.path.join(local_path, "index.faiss")
58
+ with open(faiss_file, 'wb') as f:
59
+ f.write(faiss_response)
60
+ print(f"Downloaded: {file_prefix}_index.faiss")
61
+ except Exception as e:
62
+ print(f"Error downloading index.faiss: {e}")
63
+ return None
64
 
65
+ # Download index.pkl
66
+ try:
67
+ pkl_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.pkl")
68
+ pkl_file = os.path.join(local_path, "index.pkl")
69
+ with open(pkl_file, 'wb') as f:
70
+ f.write(pkl_response)
71
+ print(f"Downloaded: {file_prefix}_index.pkl")
72
+ except Exception as e:
73
+ print(f"Error downloading index.pkl: {e}")
74
+ return None
75
 
76
  # Load vector store
77
  embeddings = HuggingFaceEmbeddings(
 
81
  )
82
 
83
  vector_store = FAISS.load_local(
84
+ local_path,
85
  embeddings,
86
  allow_dangerous_deserialization=True
87
  )
88
 
89
+ print(f"Vector store loaded from Supabase bucket: {bucket_name}")
90
  return vector_store
91
 
92
  except Exception as e: