Spaces:

surfiniaburger
/

aura-mind-glow

Sleeping

App Files Files Community

surfiniaburger commited on Aug 16

Commit

cc9c554

1 Parent(s): 48b7a28

big-query

Browse files

Files changed (5) hide show

app.py +112 -4
bigquery_uploader.py +112 -0
knowledge_base.py +215 -44
local_database.py +57 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -19,11 +19,13 @@ os.environ["TORCH_COMPILE_DISABLE"] = "1"  # Ensure torch compile is off
 # --- Step 1: Import Core Components from Modules ---
 from vision_model import load_vision_model
-from knowledge_base import get_retriever
 from agent_setup import initialize_adk
 from google.genai import types
 from story_generator import create_story_prompt_from_pdf, generate_video_from_prompt
 from langchain_huggingface import HuggingFaceEndpoint
 print("✅ All libraries imported successfully.")
@@ -32,7 +34,8 @@ print("✅ All libraries imported successfully.")
 print("Performing initial setup...")
 VISION_MODEL, PROCESSOR = load_vision_model()
-RETRIEVER = get_retriever()
 # Initialize ADK components for Connected Mode
 adk_components = initialize_adk(VISION_MODEL, PROCESSOR, RETRIEVER)
@@ -62,7 +65,6 @@ else:
 def create_field_mode_ui():
     """Creates the Gradio UI for the offline Field Mode."""
-    # ... (This function remains unchanged) ...
     def get_diagnosis_and_remedy(uploaded_image: Image.Image) -> str:
         if uploaded_image is None:
             return "Please upload an image of a maize plant first."
@@ -81,7 +83,23 @@ def create_field_mode_ui():
             if "Could not parse" in diagnosis:
                 return f"Sorry, I couldn't identify the condition from the image. Raw output: {diagnosis}"
-            remedy = REMEDY_TOOL(diagnosis)
             final_response = f"""
             ## Diagnosis Report
@@ -286,6 +304,79 @@ def create_story_mode_ui():
         )
     return demo
 # --- Step 4: App Launcher ---
 def check_internet_connection(host="8.8.8.8", port=53, timeout=3):
@@ -299,6 +390,13 @@ def check_internet_connection(host="8.8.8.8", port=53, timeout=3):
 if __name__ == "__main__":
     field_mode_ui = create_field_mode_ui()
     interface_list = [field_mode_ui]
     tab_titles = ["Field Mode (Offline)"]
@@ -323,6 +421,16 @@ if __name__ == "__main__":
         document_analysis_ui = create_document_analysis_ui()
         interface_list.append(document_analysis_ui)
         tab_titles.append("Document Analysis")
     else:
         print("❌ No internet connection. Launching in Offline Mode only.")

 # --- Step 1: Import Core Components from Modules ---
 from vision_model import load_vision_model
+from knowledge_base import KnowledgeBase
 from agent_setup import initialize_adk
 from google.genai import types
 from story_generator import create_story_prompt_from_pdf, generate_video_from_prompt
 from langchain_huggingface import HuggingFaceEndpoint
+from bigquery_uploader import upload_to_bigquery
+import local_database
 print("✅ All libraries imported successfully.")
 print("Performing initial setup...")
 VISION_MODEL, PROCESSOR = load_vision_model()
+KB = KnowledgeBase()
+RETRIEVER = KB # The retriever is now the KB itself
 # Initialize ADK components for Connected Mode
 adk_components = initialize_adk(VISION_MODEL, PROCESSOR, RETRIEVER)
 def create_field_mode_ui():
     """Creates the Gradio UI for the offline Field Mode."""
     def get_diagnosis_and_remedy(uploaded_image: Image.Image) -> str:
         if uploaded_image is None:
             return "Please upload an image of a maize plant first."
             if "Could not parse" in diagnosis:
                 return f"Sorry, I couldn't identify the condition from the image. Raw output: {diagnosis}"
+            if "Healthy" in diagnosis:
+                return """## Diagnosis Report
+**Condition Identified:**
+### Healthy Maize Plant
+---
+## Suggested Remedy
+The plant appears to be healthy. No remedy is required. Continue with good farming practices. You can find recipes for enjoying your healthy maize in the 'Knowledge Base' tab."""
+            results = KB.search(diagnosis)
+            if not results:
+                remedy = "No remedy found in the local knowledge base."
+            else:
+                remedy = results[0]['content']
             final_response = f"""
             ## Diagnosis Report
         )
     return demo
+def create_settings_ui():
+    """Creates the Gradio UI for the Settings tab."""
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="gray", secondary_hue="blue")) as demo:
+        gr.Markdown("# ⚙️ Settings & Data Management")
+        gr.Markdown("Manage application settings and data synchronization.")
+        with gr.Row():
+            with gr.Column():
+                sync_btn = gr.Button("☁️ Sync Local Data to BigQuery Cloud")
+                status_output = gr.Textbox(label="Sync Status", interactive=False, lines=5)
+        def sync_data_to_cloud():
+            yield "Attempting to sync local diagnosis data to BigQuery..."
+            try:
+                # Assuming your bigquery_uploader has a function that returns a summary
+                result_message = upload_to_bigquery()
+                yield f"Sync successful!\n{result_message}"
+            except Exception as e:
+                yield f"Sync failed!\nError: {e}"
+        sync_btn.click(
+            sync_data_to_cloud,
+            inputs=[],
+            outputs=[status_output]
+        )
+    return demo
+def create_kb_management_ui():
+    """Creates the Gradio UI for managing the knowledge base."""
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple")) as demo:
+        gr.Markdown("# 📚 Knowledge Base Management")
+        gr.Markdown("Manage the local, encrypted knowledge base.")
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### Rebuild Knowledge Base")
+                rebuild_btn = gr.Button("Rebuild from Source Files")
+                rebuild_status = gr.Textbox(label="Status", interactive=False)
+            with gr.Column():
+                gr.Markdown("### Add PDF to Knowledge Base")
+                pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+                ingest_btn = gr.Button("Ingest PDF")
+                ingest_status = gr.Textbox(label="Status", interactive=False)
+        def rebuild_kb():
+            yield "Rebuilding knowledge base..."
+            try:
+                docs = {}
+                for filename in os.listdir("knowledge_base_data"):
+                    if filename.endswith(".txt"):
+                        with open(os.path.join("knowledge_base_data", filename)) as f:
+                            docs[filename] = f.read()
+                KB.create_initial_index(docs)
+                yield "Knowledge base rebuilt successfully."
+            except Exception as e:
+                yield f"Error rebuilding knowledge base: {e}"
+        def ingest_pdf(pdf):
+            if pdf is None:
+                return "Please upload a PDF file."
+            yield "Ingesting PDF..."
+            try:
+                KB.ingest_pdf(pdf.name, os.path.basename(pdf.name))
+                yield f"Successfully ingested {os.path.basename(pdf.name)}."
+            except Exception as e:
+                yield f"Error ingesting PDF: {e}"
+        rebuild_btn.click(rebuild_kb, outputs=[rebuild_status])
+        ingest_btn.click(ingest_pdf, inputs=[pdf_input], outputs=[ingest_status])
+    return demo
 # --- Step 4: App Launcher ---
 def check_internet_connection(host="8.8.8.8", port=53, timeout=3):
 if __name__ == "__main__":
+    # Initialize local database
+    conn = local_database.get_db_connection()
+    if conn is not None:
+        local_database.init_db()
+        conn.close()
+    else:
+        print("❌ Could not create a connection to the local database.")
     field_mode_ui = create_field_mode_ui()
     interface_list = [field_mode_ui]
     tab_titles = ["Field Mode (Offline)"]
         document_analysis_ui = create_document_analysis_ui()
         interface_list.append(document_analysis_ui)
         tab_titles.append("Document Analysis")
+        # Add the Settings UI
+        settings_ui = create_settings_ui()
+        interface_list.append(settings_ui)
+        tab_titles.append("Settings")
+        # Add the Knowledge Base Management UI
+        kb_management_ui = create_kb_management_ui()
+        interface_list.append(kb_management_ui)
+        tab_titles.append("Knowledge Base")
     else:
         print("❌ No internet connection. Launching in Offline Mode only.")

bigquery_uploader.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# This file will contain the logic for uploading data to BigQuery.
+from google.cloud import bigquery
+from google.cloud.exceptions import NotFound
+import local_database
+PROJECT_ID = "gem-creation"
+DATASET_ID = "aura_mind_glow_data"
+TABLE_ID = "farm_analysis"
+def get_bigquery_client():
+    """Returns an authenticated BigQuery client."""
+    try:
+        client = bigquery.Client(project=PROJECT_ID)
+        print("Successfully authenticated with BigQuery.")
+        return client
+    except Exception as e:
+        print(f"Error authenticating with BigQuery: {e}")
+        return None
+def create_dataset_if_not_exists(client):
+    """Creates the BigQuery dataset if it doesn't exist."""
+    dataset_id = f"{PROJECT_ID}.{DATASET_ID}"
+    try:
+        client.get_dataset(dataset_id)  # Make an API request.
+        print(f"Dataset {dataset_id} already exists.")
+    except NotFound:
+        print(f"Dataset {dataset_id} is not found. Creating dataset...")
+        dataset = bigquery.Dataset(dataset_id)
+        dataset.location = "US"
+        dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
+        print(f"Created dataset {client.project}.{dataset.dataset_id}")
+def create_table_if_not_exists(client):
+    """Creates the BigQuery table if it doesn't exist."""
+    table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
+    try:
+        client.get_table(table_id)  # Make an API request.
+        print(f"Table {table_id} already exists.")
+    except NotFound:
+        print(f"Table {table_id} is not found. Creating table...")
+        schema = [
+            bigquery.SchemaField("analysis_id", "STRING", mode="REQUIRED"),
+            bigquery.SchemaField("timestamp", "TIMESTAMP", mode="REQUIRED"),
+            bigquery.SchemaField("farmer_id", "STRING", mode="NULLABLE"),
+            bigquery.SchemaField("gps_latitude", "FLOAT", mode="NULLABLE"),
+            bigquery.SchemaField("gps_longitude", "FLOAT", mode="NULLABLE"),
+            bigquery.SchemaField("crop_type", "STRING", mode="NULLABLE"),
+            bigquery.SchemaField("crop_variety", "STRING", mode="NULLABLE"),
+            bigquery.SchemaField("ai_diagnosis", "STRING", mode="NULLABLE"),
+            bigquery.SchemaField("confidence_score", "FLOAT", mode="NULLABLE"),
+            bigquery.SchemaField("recommended_action", "STRING", mode="NULLABLE"),
+            bigquery.SchemaField("farmer_feedback", "STRING", mode="NULLABLE"),
+            bigquery.SchemaField("treatment_applied", "STRING", mode="NULLABLE"),
+            bigquery.SchemaField("outcome_image_id", "STRING", mode="NULLABLE"),
+        ]
+        table = bigquery.Table(table_id, schema=schema)
+        table = client.create_table(table)  # Make an API request.
+        print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}")
+def upload_data_from_local_db():
+    """Uploads data from the local SQLite database to BigQuery."""
+    conn = local_database.create_connection()
+    if conn is None:
+        print("Could not connect to the local database.")
+        return
+    rows = local_database.get_all_analysis(conn)
+    if not rows:
+        print("No data to upload from the local database.")
+        conn.close()
+        return
+    client = get_bigquery_client()
+    if client is None:
+        conn.close()
+        return
+    create_dataset_if_not_exists(client)
+    create_table_if_not_exists(client)
+    table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
+    # Convert rows to list of dictionaries
+    rows_to_insert = []
+    for row in rows:
+        rows_to_insert.append({
+            "analysis_id": row[0],
+            "timestamp": row[1],
+            "farmer_id": row[2],
+            "gps_latitude": row[3],
+            "gps_longitude": row[4],
+            "crop_type": row[5],
+            "crop_variety": row[6],
+            "ai_diagnosis": row[7],
+            "confidence_score": row[8],
+            "recommended_action": row[9],
+            "farmer_feedback": row[10],
+            "treatment_applied": row[11],
+            "outcome_image_id": row[12],
+        })
+    errors = client.insert_rows_json(table_id, rows_to_insert)  # Make an API request.
+    if errors == []:
+        print("New rows have been added.")
+        local_database.clear_all_analysis(conn)
+        print("Local database cleared.")
+    else:
+        print(f"Encountered errors while inserting rows: {errors}")
+    conn.close()

knowledge_base.py CHANGED Viewed

@@ -1,48 +1,219 @@
 import os
-from langchain_community.vectorstores import FAISS
-from langchain_community.document_loaders import TextLoader
-from langchain_huggingface import HuggingFaceEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 import config
 def get_retriever():
-    """
-    Creates or loads a FAISS vector store and returns a retriever.
-    Returns:
-        A FAISS retriever object, or None on error.
-    """
-    try:
-        embeddings = HuggingFaceEmbeddings(model_name=config.EMBEDDING_MODEL_NAME)
-        # Force rebuild of the FAISS index
-        if os.path.exists(config.FAISS_INDEX_PATH):
-            print(f"🗑️ Deleting existing FAISS index from {config.FAISS_INDEX_PATH}...")
-            import shutil
-            shutil.rmtree(config.FAISS_INDEX_PATH)
-        print(f"⚠️ Building a new FAISS index from all files in {config.KNOWLEDGE_BASE_PATH}...")
-        documents = []
-        data_path = config.KNOWLEDGE_BASE_PATH
-        for file_name in os.listdir(data_path):
-            file_path = os.path.join(data_path, file_name)
-            if os.path.isfile(file_path) and file_name.endswith('.txt'):
-                print(f"  - Loading {file_name}...")
-                loader = TextLoader(file_path)
-                documents.extend(loader.load())
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
-        docs = text_splitter.split_documents(documents)
-        print("\n✨ Creating new FAISS index...")
-        db = FAISS.from_documents(docs, embeddings)
-        db.save_local(config.FAISS_INDEX_PATH)
-        print(f"✅ New FAISS index built and saved to {config.FAISS_INDEX_PATH}.")
-        retriever = db.as_retriever(search_kwargs={"k": 1})
-        print("✅ RAG knowledge base and retriever created successfully!")
-        return retriever
-    except Exception as e:
-        print(f"❌ CRITICAL ERROR during RAG setup: {e}")
-        return None

 import os
+import sqlite3
+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import fitz  # PyMuPDF
+from PIL import Image
+import io
+from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
+from cryptography.hazmat.primitives import padding
+from cryptography.hazmat.backends import default_backend
 import config
+# --- Security ---
+SECRET_KEY = os.environ.get("AURA_MIND_SECRET_KEY", "a_default_secret_key_32_bytes_!!").encode()
+if len(SECRET_KEY) != 32:
+    raise ValueError("SECRET_KEY must be 32 bytes long for AES-256.")
+def encrypt_data(data: bytes) -> bytes:
+    iv = os.urandom(16)
+    padder = padding.PKCS7(algorithms.AES.block_size).padder()
+    padded_data = padder.update(data) + padder.finalize()
+    cipher = Cipher(algorithms.AES(SECRET_KEY), modes.CBC(iv), backend=default_backend())
+    encryptor = cipher.encryptor()
+    encrypted_data = encryptor.update(padded_data) + encryptor.finalize()
+    return iv + encrypted_data
+def decrypt_data(encrypted_data_with_iv: bytes) -> bytes:
+    iv = encrypted_data_with_iv[:16]
+    encrypted_data = encrypted_data_with_iv[16:]
+    cipher = Cipher(algorithms.AES(SECRET_KEY), modes.CBC(iv), backend=default_backend())
+    decryptor = cipher.decryptor()
+    padded_data = decryptor.update(encrypted_data) + decryptor.finalize()
+    unpadder = padding.PKCS7(algorithms.AES.block_size).unpadder()
+    data = unpadder.update(padded_data) + unpadder.finalize()
+    return data
+# --- KnowledgeBase Class ---
+class KnowledgeBase:
+    def __init__(self, db_file="auramind_local.db", index_file="auramind_faiss.index", model_name='clip-ViT-B-32'):
+        self.db_file = db_file
+        self.index_file = index_file
+        self.model = SentenceTransformer(model_name)
+        self.init_db()
+    def init_db(self):
+        conn = sqlite3.connect(self.db_file)
+        cursor = conn.cursor()
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS documents (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                name TEXT NOT NULL UNIQUE
+            )
+        ''')
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS chunks (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                doc_id INTEGER,
+                content_type TEXT NOT NULL, -- 'text' or 'image'
+                encrypted_content BLOB NOT NULL,
+                page_num INTEGER,
+                FOREIGN KEY (doc_id) REFERENCES documents (id)
+            )
+        ''')
+        conn.commit()
+        conn.close()
+    def get_db_connection(self):
+        conn = sqlite3.connect(self.db_file)
+        conn.row_factory = sqlite3.Row
+        return conn
+    def delete_database_and_index(self):
+        if os.path.exists(self.db_file):
+            os.remove(self.db_file)
+            print(f"Removed old database: {self.db_file}")
+        if os.path.exists(self.index_file):
+            os.remove(self.index_file)
+            print(f"Removed old index: {self.index_file}")
+    def create_initial_index(self, documents_dict):
+        print("Performing a clean rebuild of the knowledge base...")
+        self.delete_database_and_index()
+        self.init_db()
+        conn = self.get_db_connection()
+        cursor = conn.cursor()
+        all_chunks = []
+        all_embeddings = []
+        for name, content in documents_dict.items():
+            cursor.execute("INSERT INTO documents (name) VALUES (?)", (name,))
+            doc_id = cursor.lastrowid
+            chunk_text = content
+            all_chunks.append((doc_id, 'text', encrypt_data(chunk_text.encode('utf-8')), 1))
+            text_embedding = self.model.encode([chunk_text])
+            all_embeddings.append(text_embedding)
+        cursor.executemany(
+            "INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
+            all_chunks
+        )
+        conn.commit()
+        conn.close()
+        if not all_embeddings:
+            print("No content to index.")
+            return
+        embeddings_np = np.vstack(all_embeddings).astype('float32')
+        dimension = embeddings_np.shape[1]
+        index = faiss.IndexFlatL2(dimension)
+        index.add(embeddings_np)
+        faiss.write_index(index, self.index_file)
+        print(f"Initial encrypted index created with {len(all_chunks)} chunks.")
+    def ingest_pdf(self, file_path, file_name):
+        print(f"Starting ingestion for: {file_name}")
+        conn = self.get_db_connection()
+        cursor = conn.cursor()
+        try:
+            cursor.execute("INSERT INTO documents (name) VALUES (?)", (file_name,))
+            doc_id = cursor.lastrowid
+        except conn.IntegrityError:
+            print("Document already exists in DB. Skipping doc table insert.")
+            doc_id = cursor.execute("SELECT id FROM documents WHERE name=?", (file_name,)).fetchone()['id']
+        doc = fitz.open(file_path)
+        new_embeddings = []
+        if os.path.exists(self.index_file):
+            index = faiss.read_index(self.index_file)
+        else:
+            dimension = self.model.encode(["test"]).shape[1]
+            index = faiss.IndexFlatL2(dimension)
+        for page_num, page in enumerate(doc):
+            text = page.get_text()
+            if text.strip():
+                encrypted_text = encrypt_data(text.encode('utf-8'))
+                cursor.execute(
+                    "INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
+                    (doc_id, 'text', encrypted_text, page_num + 1)
+                )
+                text_embedding = self.model.encode([text])
+                new_embeddings.append(text_embedding)
+            image_list = page.get_images(full=True)
+            for img_index, img in enumerate(image_list):
+                xref = img[0]
+                base_image = doc.extract_image(xref)
+                image_bytes = base_image["image"]
+                encrypted_image = encrypt_data(image_bytes)
+                cursor.execute(
+                    "INSERT INTO chunks (doc_id, content_type, encrypted_content, page_num) VALUES (?, ?, ?, ?)",
+                    (doc_id, 'image', encrypted_image, page_num + 1)
+                )
+                pil_image = Image.open(io.BytesIO(image_bytes))
+                image_embedding = self.model.encode(pil_image)
+                new_embeddings.append(image_embedding.reshape(1, -1))
+        conn.commit()
+        conn.close()
+        if new_embeddings:
+            embeddings_np = np.vstack(new_embeddings).astype('float32')
+            index.add(embeddings_np)
+            faiss.write_index(index, self.index_file)
+            print(f"Successfully ingested {file_name} and added {len(new_embeddings)} new chunks.")
+        else:
+            print(f"No new content found to ingest in {file_name}.")
+    def search(self, query, k=1):
+        if not os.path.exists(self.index_file):
+            return []
+        index = faiss.read_index(self.index_file)
+        query_embedding = self.model.encode([query]).astype('float32')
+        distances, indices = index.search(query_embedding, k)
+        results = []
+        conn = self.get_db_connection()
+        for i, faiss_id in enumerate(indices[0]):
+            if faiss_id != -1:
+                sql_id = int(faiss_id) + 1
+                chunk_record = conn.execute('SELECT * FROM chunks WHERE id = ?', (sql_id,)).fetchone()
+                if chunk_record:
+                    content_type = chunk_record['content_type']
+                    decrypted_content_bytes = decrypt_data(chunk_record['encrypted_content'])
+                    if content_type == 'text':
+                        content = decrypted_content_bytes.decode('utf-8')
+                    elif content_type == 'image':
+                        content = Image.open(io.BytesIO(decrypted_content_bytes))
+                    results.append({
+                        'distance': distances[0][i],
+                        'content': content,
+                        'type': content_type,
+                        'page': chunk_record['page_num']
+                    })
+        conn.close()
+        return results
 def get_retriever():
+    kb = KnowledgeBase()
+    # This is a placeholder to maintain compatibility with the existing code.
+    # The actual search will be done using kb.search()
+    class Retriever:
+        def __init__(self, kb):
+            self.kb = kb
+        def get_relevant_documents(self, query):
+            results = self.kb.search(query)
+            # Langchain retrievers expect a list of Document objects.
+            # We will return the content of the documents for now.
+            from langchain.schema import Document
+            return [Document(page_content=r['content']) if r['type'] == 'text' else r['content'] for r in results]
+    return Retriever(kb)

local_database.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import sqlite3
+import os
+DB_FILE = "auramind_local.db"
+INDEX_FILE = "auramind_faiss.index"
+def init_db():
+    """
+    Initializes a more robust database schema for multimodal data.
+    - 'documents' table tracks the source files.
+    - 'chunks' table stores the individual encrypted text/image chunks.
+    """
+    conn = sqlite3.connect(DB_FILE)
+    cursor = conn.cursor()
+    # Table to track the source documents (e.g., 'healthy_maize.txt', 'user_guide.pdf')
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS documents (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            name TEXT NOT NULL UNIQUE
+        )
+    ''')
+    # Table to store each chunk of content (text or image)
+    # The faiss_id will correspond to the row number in the FAISS index
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS chunks (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            doc_id INTEGER,
+            content_type TEXT NOT NULL, -- 'text' or 'image'
+            encrypted_content BLOB NOT NULL,
+            page_num INTEGER,
+            FOREIGN KEY (doc_id) REFERENCES documents (id)
+        )
+    ''')
+    conn.commit()
+    conn.close()
+def get_db_connection():
+    """Establishes a connection to the database."""
+    conn = sqlite3.connect(DB_FILE)
+    conn.row_factory = sqlite3.Row
+    return conn
+def check_if_indexed():
+    """Checks if the initial database and index file exist."""
+    # A basic check. A more robust check might query the db for content.
+    return os.path.exists(DB_FILE) and os.path.exists(INDEX_FILE)
+def delete_database_and_index():
+    """Deletes existing db and index files for a clean rebuild."""
+    if os.path.exists(DB_FILE):
+        os.remove(DB_FILE)
+        print(f"Removed old database: {DB_FILE}")
+    if os.path.exists(INDEX_FILE):
+        os.remove(INDEX_FILE)
+        print(f"Removed old index: {INDEX_FILE}")

requirements.txt CHANGED Viewed

@@ -17,4 +17,5 @@ duckduckgo-search
 langgraph
 google-genai
 google-adk
-pypdf

 langgraph
 google-genai
 google-adk
+pypdf
+google-cloud-bigquery