File size: 2,948 Bytes
7e1f5f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import json
import os
import sys
from pathlib import Path
from typing import Dict, Any, List

from loguru import logger
from omegaconf import OmegaConf
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker


def process_json_files(
        raw_data_path: str) -> tuple[List[Dict[str, str]], List[Dict[str, Any]]]:
    """
    Process all JSON files in the raw data folder.

    Args:
        raw_data_path (str): Path to the folder containing JSON files.

    Returns:
        tuple: Lists of public and meme data to be added to the database.
    """
    publics_to_add: List[Dict[str, str]] = []
    memes_to_add: List[Dict[str, Any]] = []

    for filename in os.listdir(raw_data_path):
        if filename.endswith('.json'):
            public_vk = filename[:-5]  # Remove .json extension
            file_path = os.path.join(raw_data_path, filename)

            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)

            publics_to_add.append({
                "public_vk": public_vk,
                "public_name": data['name']
            })

            for post in data['posts']:
                memes_to_add.append({
                    "public_vk": public_vk,
                    "text": post['text'],
                    "image_url": post['image_url']
                })

            logger.info(
                f"Processed file: {filename}, found {len(data['posts'])} memes")

    return publics_to_add, memes_to_add


def main():
    from src.db.models import Base
    from src.db import crud

    logger.add("logs/make_db.log", rotation="10 MB")

    # Load configuration
    config = OmegaConf.load('config.yaml')
    config = OmegaConf.to_container(config)

    engine = create_engine(config['database']['url'])

    # Drop all existing tables and create new ones
    Base.metadata.drop_all(bind=engine)
    Base.metadata.create_all(bind=engine)

    SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
    db = SessionLocal()

    raw_data_path = config['data_folders']['raw_data']

    publics_to_add, memes_to_add = process_json_files(raw_data_path)

    # Add all publics to the database
    added_publics = crud.add_publics(db, publics_to_add)

    # Create a mapping of public_vk to public_id
    public_vk_to_id = {public.public_vk: public.id for public in added_publics}

    # Update memes with correct public_id
    for meme in memes_to_add:
        meme['public_id'] = public_vk_to_id[meme.pop('public_vk')]

    # Add all memes to the database
    crud.add_memes(db, memes_to_add)

    logger.info(
        f"Added {len(added_publics)} publics and {len(memes_to_add)} memes to the database")

    db.close()
    logger.info("Database population completed")


if __name__ == "__main__":
    # Set up project root path
    project_root = Path(__file__).resolve().parents[1]
    sys.path.insert(0, str(project_root))
    main()