File size: 3,573 Bytes
4537bf6
 
 
 
aef4add
4537bf6
 
 
 
 
 
aef4add
 
 
 
 
 
 
 
 
2ec069e
 
 
 
aef4add
2ec069e
 
 
 
aef4add
2ec069e
 
aef4add
 
2ec069e
 
 
aef4add
 
2ec069e
 
 
 
 
 
aef4add
2ec069e
 
 
 
aef4add
2ec069e
 
 
aef4add
 
2ec069e
aef4add
 
2ec069e
 
 
aef4add
 
 
 
 
 
 
 
 
 
 
 
2ec069e
aef4add
2ec069e
 
 
 
aef4add
2ec069e
aef4add
 
 
 
 
2ec069e
aef4add
2ec069e
 
 
aef4add
 
2ec069e
aef4add
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import hashlib
import os
import shutil
import sqlite3
import uuid
from datetime import datetime

import gradio as gr
import huggingface_hub
import pandas as pd
import pytz
from apscheduler.schedulers.background import BackgroundScheduler


class TrafficDataHandler:
    _DB_FILE_PATH = "./traffic_data.db"
    _DB_TEMP_PATH = "./data/traffic_data.db"
    _TOKEN = os.environ.get("HUB_TOKEN")
    _TZ = "Europe/Stockholm"
    _INTERVAL_MIN_UPDATE = 30
    _repo = huggingface_hub.Repository(
        local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=_TOKEN
    )
    _session_uuid = None

    @classmethod
    def _pull_repo_data(cls):
        cls._repo.git_pull()
        shutil.copyfile(cls._DB_TEMP_PATH, cls._DB_FILE_PATH)

    @staticmethod
    def _hash_ip(ip_address):
        return hashlib.sha256(ip_address.encode()).hexdigest()

    @classmethod
    def _current_time_in_sweden(cls):
        swedish_tz = pytz.timezone(cls._TZ)
        return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")

    @classmethod
    def onload_store_metric_data(cls, request: gr.Request):
        cls._session_uuid = str(uuid.uuid1())
        cls._setup_database()
        hashed_host = cls._hash_ip(request.client.host)
        cls._backup_and_update_database(hashed_host, "load")

    @classmethod
    def store_metric_data(cls, action, request: gr.Request):
        hashed_host = cls._hash_ip(request.client.host)
        cls._backup_and_update_database(hashed_host, action)

    @classmethod
    def _commit_host_to_database(cls, hashed_host, action):
        with sqlite3.connect(cls._DB_FILE_PATH) as db:
            db.execute(
                "INSERT INTO ip_data(current_time, hashed_ip, session_uuid, action) VALUES(?,?,?,?)",
                [cls._current_time_in_sweden(), hashed_host, cls._session_uuid, action],
            )

    @classmethod
    def _setup_database(cls):
        with sqlite3.connect(cls._DB_FILE_PATH) as db:
            try:
                db.execute("SELECT * FROM ip_data").fetchall()
            except sqlite3.OperationalError:
                db.execute(
                    """
                    CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
                                          current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
                                          hashed_ip TEXT,
                                          session_uuid TEXT,
                                          action TEXT)
                    """
                )
        cls._pull_repo_data()

    @classmethod
    def _backup_and_update_database(cls, hashed_host, action):
        cls._commit_host_to_database(hashed_host, action)
        shutil.copyfile(cls._DB_FILE_PATH, cls._DB_TEMP_PATH)

        with sqlite3.connect(cls._DB_FILE_PATH) as db:
            ip_data = db.execute("SELECT * FROM ip_data").fetchall()
            pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip", "session_uuid", "action"]).to_csv(
                "./data/ip_data.csv", index=False
            )

        cls._repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")

    @classmethod
    def _initialize_and_schedule_backup(cls, hashed_host, action):
        cls._backup_and_update_database(hashed_host, action)
        scheduler = BackgroundScheduler()
        scheduler.add_job(
            cls._backup_and_update_database, "interval", minutes=cls._INTERVAL_MIN_UPDATE, args=(hashed_host, action)
        )
        scheduler.start()