File size: 877 Bytes
8dc29a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc24b49
8dc29a8
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import pandas as pd
from tqdm import tqdm
from io import BytesIO
import requests as rq
from os import environ

environ["PYTHONWARNINGS"] = "ignore:Unverified HTTPS request"


class Data:
    def __init__(self, file_path: str) -> None:
        self.file_path = file_path

    def load(self):

        # read from url
        if "githubusercontent" in self.file_path:

            print("Loading dataset hosted on Github")

            data = rq.get(self.file_path).content
            return pd.read_csv(BytesIO(data))

        # read file from disk in chunks
        return pd.concat(
            [
                chunk
                for chunk in tqdm(
                    pd.read_csv(self.file_path, chunksize=1000),
                    desc="Loading dataset",
                    total=218,
                    ascii="░▒█",
                )
            ]
        )