Update utils.py
Browse files
utils.py
CHANGED
|
@@ -485,7 +485,7 @@ def document_storage_chroma(splits):
|
|
| 485 |
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn)
|
| 486 |
|
| 487 |
return vectorstore
|
| 488 |
-
|
| 489 |
########################################################
|
| 490 |
#Splits für den Vektorstore speichern - bzw. laden
|
| 491 |
def save_splits(preprocessed_splits, original_splits, directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
|
@@ -506,7 +506,7 @@ def save_splits(preprocessed_splits, original_splits, directory="chroma/kkg", pr
|
|
| 506 |
# Hochladen der Splits-Dateien zum Hugging Face Space
|
| 507 |
upload_file_to_huggingface(preprocessed_filepath, f"{directory}/{preprocessed_filename}")
|
| 508 |
upload_file_to_huggingface(original_filepath, f"{directory}/{original_filename}")
|
| 509 |
-
|
| 510 |
def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
| 511 |
# Vollständigen Pfad zur Datei erstellen
|
| 512 |
preprocessed_filepath = os.path.join(directory, preprocessed_filename)
|
|
@@ -522,11 +522,41 @@ def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_spli
|
|
| 522 |
|
| 523 |
return preprocessed_splits, original_splits
|
| 524 |
return None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
|
| 526 |
########################################################
|
| 527 |
#Vektorstore speichern - bzw. laden
|
| 528 |
#Laden des Vektorstores - aus den gespeicherten splits
|
| 529 |
-
|
| 530 |
def load_vectorstore():
|
| 531 |
splits_and_metadata = load_splits_and_metadata()
|
| 532 |
if splits_and_metadata is not None:
|
|
@@ -549,7 +579,7 @@ def save_split_to_original_mapping(mapping, directory="chroma/kkg", filename="ma
|
|
| 549 |
|
| 550 |
# Hochladen der Mapping-Datei zum Hugging Face Space
|
| 551 |
upload_file_to_huggingface(filepath, f"{directory}/{filename}")
|
| 552 |
-
|
| 553 |
def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl"):
|
| 554 |
# Vollständigen Pfad zur Datei erstellen
|
| 555 |
filepath = os.path.join(directory, filename)
|
|
@@ -559,7 +589,24 @@ def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl
|
|
| 559 |
with open(filepath, "rb") as f:
|
| 560 |
return pickle.load(f)
|
| 561 |
return None
|
|
|
|
| 562 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 563 |
|
| 564 |
|
| 565 |
#######################################
|
|
|
|
| 485 |
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_fn)
|
| 486 |
|
| 487 |
return vectorstore
|
| 488 |
+
|
| 489 |
########################################################
|
| 490 |
#Splits für den Vektorstore speichern - bzw. laden
|
| 491 |
def save_splits(preprocessed_splits, original_splits, directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
|
|
|
| 506 |
# Hochladen der Splits-Dateien zum Hugging Face Space
|
| 507 |
upload_file_to_huggingface(preprocessed_filepath, f"{directory}/{preprocessed_filename}")
|
| 508 |
upload_file_to_huggingface(original_filepath, f"{directory}/{original_filename}")
|
| 509 |
+
"""
|
| 510 |
def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
| 511 |
# Vollständigen Pfad zur Datei erstellen
|
| 512 |
preprocessed_filepath = os.path.join(directory, preprocessed_filename)
|
|
|
|
| 522 |
|
| 523 |
return preprocessed_splits, original_splits
|
| 524 |
return None, None
|
| 525 |
+
"""
|
| 526 |
+
def load_splits(directory="chroma/kkg", preprocessed_filename="preprocessed_splits.pkl", original_filename="original_splits.pkl"):
|
| 527 |
+
preprocessed_splits = None
|
| 528 |
+
original_splits = None
|
| 529 |
+
|
| 530 |
+
try:
|
| 531 |
+
# Laden der vorverarbeiteten Splits aus dem Hugging Face Repository
|
| 532 |
+
preprocessed_file = hf_hub_download(
|
| 533 |
+
repo_id=STORAGE_REPO_ID,
|
| 534 |
+
filename=f"{directory}/{preprocessed_filename}",
|
| 535 |
+
repo_type="space",
|
| 536 |
+
token=hf_token
|
| 537 |
+
)
|
| 538 |
+
with open(preprocessed_file, "rb") as f:
|
| 539 |
+
preprocessed_splits = pickle.load(f)
|
| 540 |
+
|
| 541 |
+
# Laden der originalen Splits aus dem Hugging Face Repository
|
| 542 |
+
original_file = hf_hub_download(
|
| 543 |
+
repo_id=STORAGE_REPO_ID,
|
| 544 |
+
filename=f"{directory}/{original_filename}",
|
| 545 |
+
repo_type="space",
|
| 546 |
+
token=hf_token
|
| 547 |
+
)
|
| 548 |
+
with open(original_file, "rb") as f:
|
| 549 |
+
original_splits = pickle.load(f)
|
| 550 |
+
|
| 551 |
+
except Exception as e:
|
| 552 |
+
print(f"Fehler beim Laden der Splits: {str(e)}")
|
| 553 |
+
|
| 554 |
+
return preprocessed_splits, original_splits
|
| 555 |
|
| 556 |
########################################################
|
| 557 |
#Vektorstore speichern - bzw. laden
|
| 558 |
#Laden des Vektorstores - aus den gespeicherten splits
|
| 559 |
+
"""
|
| 560 |
def load_vectorstore():
|
| 561 |
splits_and_metadata = load_splits_and_metadata()
|
| 562 |
if splits_and_metadata is not None:
|
|
|
|
| 579 |
|
| 580 |
# Hochladen der Mapping-Datei zum Hugging Face Space
|
| 581 |
upload_file_to_huggingface(filepath, f"{directory}/{filename}")
|
| 582 |
+
"""
|
| 583 |
def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl"):
|
| 584 |
# Vollständigen Pfad zur Datei erstellen
|
| 585 |
filepath = os.path.join(directory, filename)
|
|
|
|
| 589 |
with open(filepath, "rb") as f:
|
| 590 |
return pickle.load(f)
|
| 591 |
return None
|
| 592 |
+
"""
|
| 593 |
|
| 594 |
+
def load_split_to_original_mapping(directory="chroma/kkg", filename="mapping.pkl"):
|
| 595 |
+
try:
|
| 596 |
+
# Laden des Mappings aus dem Hugging Face Repository
|
| 597 |
+
file_path = hf_hub_download(
|
| 598 |
+
repo_id=STORAGE_REPO_ID,
|
| 599 |
+
filename=f"{directory}/{filename}",
|
| 600 |
+
repo_type="space",
|
| 601 |
+
token=hf_token
|
| 602 |
+
)
|
| 603 |
+
|
| 604 |
+
with open(file_path, "rb") as f:
|
| 605 |
+
return pickle.load(f)
|
| 606 |
+
|
| 607 |
+
except Exception as e:
|
| 608 |
+
print(f"Fehler beim Laden des Mappings: {str(e)}")
|
| 609 |
+
return None
|
| 610 |
|
| 611 |
|
| 612 |
#######################################
|