Update utils.py
Browse files
utils.py
CHANGED
|
@@ -101,6 +101,25 @@ HF_WRITE = os.getenv("HF_WRITE")
|
|
| 101 |
# HfApi-Instanz erstellen
|
| 102 |
api = HfApi()
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
#Maoing für die Splits (orginal und Preprocessed
|
| 105 |
split_to_original_mapping = []
|
| 106 |
|
|
@@ -331,23 +350,6 @@ def split_documents_with_id(docs, text_splitter):
|
|
| 331 |
splits.append(split_doc)
|
| 332 |
return splits
|
| 333 |
|
| 334 |
-
#######################################
|
| 335 |
-
# Dokumente aus anderem Space laden
|
| 336 |
-
#######################################
|
| 337 |
-
#ein File aus dem Space mit der REPO_ID laden
|
| 338 |
-
def download_file_from_hf(file_name, save_path):
|
| 339 |
-
url = f"https://huggingface.co/{STORAGE_REPO_ID}/resolve/main/{file_name}"
|
| 340 |
-
response = requests.get(url)
|
| 341 |
-
response.raise_for_status() # Raise an error for bad status codes
|
| 342 |
-
with open(save_path, 'wb') as file:
|
| 343 |
-
file.write(response.content)
|
| 344 |
-
return save_path
|
| 345 |
-
|
| 346 |
-
#Liste aller Files in dem Space mit der Repo_id
|
| 347 |
-
def list_files_in_hf_repo(repo_id):
|
| 348 |
-
repo_info = api.list_repo_files(repo_id=repo_id)
|
| 349 |
-
return repo_info
|
| 350 |
-
|
| 351 |
|
| 352 |
|
| 353 |
########################################
|
|
@@ -523,6 +525,23 @@ def upload_file_to_huggingface(file_path, upload_path):
|
|
| 523 |
repo_id=STORAGE_REPO_ID,
|
| 524 |
repo_type=REPO_TYPE,
|
| 525 |
token=HF_WRITE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 526 |
)
|
| 527 |
|
| 528 |
|
|
|
|
| 101 |
# HfApi-Instanz erstellen
|
| 102 |
api = HfApi()
|
| 103 |
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
# Login mit dem Token
|
| 107 |
+
login(token=hf_token)
|
| 108 |
+
|
| 109 |
+
# API-Instanz erstellen
|
| 110 |
+
api = HfApi()
|
| 111 |
+
|
| 112 |
+
# Überprüfen, ob das Repository existiert und zugänglich ist
|
| 113 |
+
try:
|
| 114 |
+
repo_info = api.list_repo_files(repo_id=STORAGE_REPO_ID, repo_type=REPO_TYPE)
|
| 115 |
+
print(f"Repository '{STORAGE_REPO_ID}' enthält folgende Dateien: {repo_info}")
|
| 116 |
+
except Exception as e:
|
| 117 |
+
print(f"Fehler beim Zugriff auf das Repository: {e}")
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
|
| 123 |
#Maoing für die Splits (orginal und Preprocessed
|
| 124 |
split_to_original_mapping = []
|
| 125 |
|
|
|
|
| 350 |
splits.append(split_doc)
|
| 351 |
return splits
|
| 352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
|
| 355 |
########################################
|
|
|
|
| 525 |
repo_id=STORAGE_REPO_ID,
|
| 526 |
repo_type=REPO_TYPE,
|
| 527 |
token=HF_WRITE
|
| 528 |
+
|
| 529 |
+
#######################################
|
| 530 |
+
# Dokumente aus anderem Space laden
|
| 531 |
+
#######################################
|
| 532 |
+
#ein File aus dem Space mit der REPO_ID laden
|
| 533 |
+
def download_file_from_hf(file_name, save_path):
|
| 534 |
+
url = f"https://huggingface.co/{STORAGE_REPO_ID}/resolve/main/{file_name}"
|
| 535 |
+
headers = {"Authorization": f"Bearer {HF_READ}"}
|
| 536 |
+
response = requests.get(url, headers=headers)
|
| 537 |
+
response.raise_for_status() # Raise an error for bad status codes
|
| 538 |
+
with open(save_path, 'wb') as file:
|
| 539 |
+
file.write(response.content)
|
| 540 |
+
return save_path
|
| 541 |
+
|
| 542 |
+
def list_files_in_hf_repo(repo_id):
|
| 543 |
+
repo_info = api.list_repo_files(repo_id=repo_id, repo_type=REPO_TYPE)
|
| 544 |
+
return repo_info
|
| 545 |
)
|
| 546 |
|
| 547 |
|