dataset_interface / database.py
Danielrahmai1991's picture
Update database.py
2aef697 verified
import sqlite3
import os
from datetime import datetime
def initialize_database():
"""
Initialize the SQLite database and create the 'documents' table if it doesn't exist.
This function performs the following steps:
1. Connects to the SQLite database (or creates it if it doesn't exist).
2. Creates the 'documents' table with the following columns:
- `id`: An auto-incrementing primary key.
- `text`: The main text content of the document (required, non-nullable).
- `topics`: A string representing associated topics (optional).
- `date`: A timestamp indicating when the row was inserted (default: current timestamp).
3. Commits the changes and closes the connection.
The `date` column is automatically populated with the current timestamp when a new row is inserted.
Example:
--------
>>> initialize_database()
# Creates or updates the 'dataset.db' file with the 'documents' table schema.
"""
# Connect to the SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect('dataset.db')
cursor = conn.cursor()
# Create the 'documents' table if it doesn't exist
cursor.execute('''
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT NOT NULL,
topics TEXT,
date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# Commit changes and close the connection
conn.commit()
conn.close()
from huggingface_hub import HfApi
def commit_to_huggingface():
"""Commit the dataset.db file to the Hugging Face Space repository."""
api_token = os.getenv("hf_key")
api = HfApi(token=api_token)
# Replace with your Space's repository name
repo_id = "Danielrahmai1991/dataset_interface"
# Upload and commit the dataset.db file
api.upload_file(
path_or_fileobj="dataset.db",
path_in_repo="dataset.db",
repo_id=repo_id,
repo_type="space"
)
def save_to_db(chunks, topics=None):
"""
Save chunks of text to the SQLite database.
This function performs the following steps:
1. Ensures the database and 'documents' table are initialized by calling `initialize_database`.
2. Connects to the SQLite database.
3. Inserts each chunk of text into the 'documents' table along with associated topics.
- The `text` column stores the chunk of text.
- The `topics` column stores the associated topics (optional).
- The `date` column is automatically populated with the current timestamp when the row is inserted.
4. Commits the changes and closes the connection.
5. Calls `commit_to_huggingface` to synchronize the database with an external repository (if applicable).
Parameters:
----------
chunks : list of str
A list of text chunks to be saved to the database.
topics : str or None, optional
A string representing the topics associated with the chunks. Defaults to None.
Example:
--------
>>> save_to_db(["This is the first chunk.", "This is the second chunk."], "Example Topics")
# Saves two rows to the 'documents' table with the provided text and topics.
"""
# Ensure the database and table are initialized
initialize_database()
# Connect to the database
conn = sqlite3.connect('dataset.db')
cursor = conn.cursor()
# Insert chunks into the database
for chunk in chunks:
cursor.execute('INSERT INTO documents (text, topics) VALUES (?, ?)', (chunk, topics))
# Commit changes and close the connection
conn.commit()
conn.close()
commit_to_huggingface()