File size: 972 Bytes
b9354c2
 
 
 
 
 
 
 
9442f34
b9354c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9442f34
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from abc import ABC, abstractmethod
from datasets import load_dataset, Dataset
from datasets.data_files import EmptyDatasetError

class HFDataset(ABC):
  """
  Create a dataset to save the transcripts from Youtube.
  """
  def __init__(self, name) -> None:
    self.name = name
    if name != "":
      self._init_dataset()
    else:
      self.dataset = Dataset.from_dict({})
      self.exist = False
      self.is_empty = True

  @abstractmethod
  def generate_dataset():
    pass

  def _init_dataset(self):
    try:
      self.dataset = load_dataset(self.name)
      self.exist = True
      self.is_empty = False
    except EmptyDatasetError:
      self.dataset = Dataset.from_dict({})
      self.exist = True
      self.is_empty = True
      pass
    except FileNotFoundError:
      self.dataset = Dataset.from_dict({})
      self.exist = False
      self.is_empty = True
      pass
  
  def upload(self, token):
    self.dataset.push_to_hub(self.name, token = token)