intersteller2887 commited on
Commit
040fb51
·
verified ·
1 Parent(s): c8770eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -15
app.py CHANGED
@@ -9,7 +9,7 @@ import collections
9
  from functools import wraps
10
  from filelock import FileLock
11
  from datasets import load_dataset, Audio
12
- from huggingface_hub import HfApi
13
  from multiprocessing import TimeoutError
14
  from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
15
 
@@ -116,29 +116,27 @@ QUESTION_SET = [
116
  def load_or_initialize_count_json(audio_paths):
117
  # Add filelock to /workspace/count.json
118
  lock_path = COUNT_JSON_PATH + ".lock"
 
119
  try:
120
- api = HfApi()
121
- repo_id = "intersteller2887/Turing-test-dataset"
122
- hf_token = os.getenv("HF_TOKEN")
123
- count_json = None
124
- try:
125
- count_json = api.get_file(
126
- path_in_repo=COUNT_JSON_REPO_PATH,
127
- repo_id=repo_id,
128
  repo_type="dataset",
129
- token=hf_token
 
130
  )
131
- except Exception as e:
132
- print(f"Count not download count.json from dataset {e}")
 
133
  except Exception as e:
134
- print(f"Count not download count.json from dataset {e}")
135
 
136
  # Read of count.json will wait for 10 seconds until another thread involving releases it, and then add a lock to it
137
  with FileLock(lock_path, timeout=10):
138
  # If count.json exists: load into count_data
139
  # Else initialize count_data with orderedDict
140
- with open(COUNT_JSON_PATH, "wb") as f:
141
- f.write(count_json)
142
  if os.path.exists(COUNT_JSON_PATH):
143
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
144
  count_data = json.load(f, object_pairs_hook=collections.OrderedDict)
 
9
  from functools import wraps
10
  from filelock import FileLock
11
  from datasets import load_dataset, Audio
12
+ from huggingface_hub import HfApi, hf_hub_download
13
  from multiprocessing import TimeoutError
14
  from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
15
 
 
116
  def load_or_initialize_count_json(audio_paths):
117
  # Add filelock to /workspace/count.json
118
  lock_path = COUNT_JSON_PATH + ".lock"
119
+
120
  try:
121
+ # Only try downloading if file doesn't exist yet
122
+ if not os.path.exists(COUNT_JSON_PATH):
123
+ downloaded_path = hf_hub_download(
124
+ repo_id=REPO_ID,
 
 
 
 
125
  repo_type="dataset",
126
+ filename=COUNT_JSON_REPO_FILENAME,
127
+ token=os.getenv("HF_TOKEN")
128
  )
129
+ # Save it as COUNT_JSON_PATH so that the lock logic remains untouched
130
+ with open(downloaded_path, "rb") as src, open(COUNT_JSON_PATH, "wb") as dst:
131
+ dst.write(src.read())
132
  except Exception as e:
133
+ print(f"Could not download or save count.json from HuggingFace dataset: {e}")
134
 
135
  # Read of count.json will wait for 10 seconds until another thread involving releases it, and then add a lock to it
136
  with FileLock(lock_path, timeout=10):
137
  # If count.json exists: load into count_data
138
  # Else initialize count_data with orderedDict
139
+
 
140
  if os.path.exists(COUNT_JSON_PATH):
141
  with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
142
  count_data = json.load(f, object_pairs_hook=collections.OrderedDict)