Spaces:
Sleeping
Sleeping
File size: 3,676 Bytes
06a53dc d90bac0 2aef697 06a53dc 2aef697 06a53dc 2aef697 06a53dc b97fe69 d90bac0 b97fe69 e0683ca b97fe69 e0683ca 06a53dc 2aef697 06a53dc b97fe69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import sqlite3
import os
from datetime import datetime
def initialize_database():
"""
Initialize the SQLite database and create the 'documents' table if it doesn't exist.
This function performs the following steps:
1. Connects to the SQLite database (or creates it if it doesn't exist).
2. Creates the 'documents' table with the following columns:
- `id`: An auto-incrementing primary key.
- `text`: The main text content of the document (required, non-nullable).
- `topics`: A string representing associated topics (optional).
- `date`: A timestamp indicating when the row was inserted (default: current timestamp).
3. Commits the changes and closes the connection.
The `date` column is automatically populated with the current timestamp when a new row is inserted.
Example:
--------
>>> initialize_database()
# Creates or updates the 'dataset.db' file with the 'documents' table schema.
"""
# Connect to the SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect('dataset.db')
cursor = conn.cursor()
# Create the 'documents' table if it doesn't exist
cursor.execute('''
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT NOT NULL,
topics TEXT,
date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# Commit changes and close the connection
conn.commit()
conn.close()
from huggingface_hub import HfApi
def commit_to_huggingface():
"""Commit the dataset.db file to the Hugging Face Space repository."""
api_token = os.getenv("hf_key")
api = HfApi(token=api_token)
# Replace with your Space's repository name
repo_id = "Danielrahmai1991/dataset_interface"
# Upload and commit the dataset.db file
api.upload_file(
path_or_fileobj="dataset.db",
path_in_repo="dataset.db",
repo_id=repo_id,
repo_type="space"
)
def save_to_db(chunks, topics=None):
"""
Save chunks of text to the SQLite database.
This function performs the following steps:
1. Ensures the database and 'documents' table are initialized by calling `initialize_database`.
2. Connects to the SQLite database.
3. Inserts each chunk of text into the 'documents' table along with associated topics.
- The `text` column stores the chunk of text.
- The `topics` column stores the associated topics (optional).
- The `date` column is automatically populated with the current timestamp when the row is inserted.
4. Commits the changes and closes the connection.
5. Calls `commit_to_huggingface` to synchronize the database with an external repository (if applicable).
Parameters:
----------
chunks : list of str
A list of text chunks to be saved to the database.
topics : str or None, optional
A string representing the topics associated with the chunks. Defaults to None.
Example:
--------
>>> save_to_db(["This is the first chunk.", "This is the second chunk."], "Example Topics")
# Saves two rows to the 'documents' table with the provided text and topics.
"""
# Ensure the database and table are initialized
initialize_database()
# Connect to the database
conn = sqlite3.connect('dataset.db')
cursor = conn.cursor()
# Insert chunks into the database
for chunk in chunks:
cursor.execute('INSERT INTO documents (text, topics) VALUES (?, ?)', (chunk, topics))
# Commit changes and close the connection
conn.commit()
conn.close()
commit_to_huggingface()
|