Turing-test-web-en

Sleeping

App Files Files Community

intersteller2887 commited on Jul 13

Commit

040fb51

verified ·

1 Parent(s): c8770eb

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -15

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import collections
 from functools import wraps
 from filelock import FileLock
 from datasets import load_dataset, Audio
-from huggingface_hub import HfApi
 from multiprocessing import TimeoutError
 from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
@@ -116,29 +116,27 @@ QUESTION_SET = [
 def load_or_initialize_count_json(audio_paths):
     # Add filelock to /workspace/count.json
     lock_path = COUNT_JSON_PATH + ".lock"
     try:
-        api = HfApi()
-        repo_id = "intersteller2887/Turing-test-dataset"
-        hf_token = os.getenv("HF_TOKEN")
-        count_json = None
-        try:
-            count_json = api.get_file(
-                path_in_repo=COUNT_JSON_REPO_PATH,
-                repo_id=repo_id,
                 repo_type="dataset",
-                token=hf_token
             )
-        except Exception as e:
-            print(f"Count not download count.json from dataset {e}")
     except Exception as e:
-        print(f"Count not download count.json from dataset {e}")
     # Read of count.json will wait for 10 seconds until another thread involving releases it, and then add a lock to it
     with FileLock(lock_path, timeout=10):
         # If count.json exists: load into count_data
         # Else initialize count_data with orderedDict
-        with open(COUNT_JSON_PATH, "wb") as f:
-            f.write(count_json)
         if os.path.exists(COUNT_JSON_PATH):
             with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
                 count_data = json.load(f, object_pairs_hook=collections.OrderedDict)

 from functools import wraps
 from filelock import FileLock
 from datasets import load_dataset, Audio
+from huggingface_hub import HfApi, hf_hub_download
 from multiprocessing import TimeoutError
 from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
 def load_or_initialize_count_json(audio_paths):
     # Add filelock to /workspace/count.json
     lock_path = COUNT_JSON_PATH + ".lock"
     try:
+        # Only try downloading if file doesn't exist yet
+        if not os.path.exists(COUNT_JSON_PATH):
+            downloaded_path = hf_hub_download(
+                repo_id=REPO_ID,
                 repo_type="dataset",
+                filename=COUNT_JSON_REPO_FILENAME,
+                token=os.getenv("HF_TOKEN")
             )
+            # Save it as COUNT_JSON_PATH so that the lock logic remains untouched
+            with open(downloaded_path, "rb") as src, open(COUNT_JSON_PATH, "wb") as dst:
+                dst.write(src.read())
     except Exception as e:
+        print(f"Could not download or save count.json from HuggingFace dataset: {e}")
     # Read of count.json will wait for 10 seconds until another thread involving releases it, and then add a lock to it
     with FileLock(lock_path, timeout=10):
         # If count.json exists: load into count_data
         # Else initialize count_data with orderedDict
         if os.path.exists(COUNT_JSON_PATH):
             with open(COUNT_JSON_PATH, "r", encoding="utf-8") as f:
                 count_data = json.load(f, object_pairs_hook=collections.OrderedDict)