Update utils.py
Browse files
utils.py
CHANGED
|
@@ -397,6 +397,7 @@ def document_storage_chroma(splits):
|
|
| 397 |
|
| 398 |
########################################################
|
| 399 |
#Vektorstore speichern - bzw. laden
|
|
|
|
| 400 |
def save_splits_and_metadata(splits, directory="chroma/kkg", filename="splits_and_metadata.pkl"):
|
| 401 |
# Erstellen des Verzeichnisses, falls es nicht existiert
|
| 402 |
if not os.path.exists(directory):
|
|
@@ -427,6 +428,66 @@ def load_vectorstore():
|
|
| 427 |
PREPROCESSED_SPLITS, SPLIT_TO_ORIGINAL_MAPPING = splits_and_metadata
|
| 428 |
return document_storage_chroma(PREPROCESSED_SPLITS)
|
| 429 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
|
| 431 |
# Beispiel-Upload-Funktion
|
| 432 |
def upload_file_to_huggingface(file_path, upload_path):
|
|
|
|
| 397 |
|
| 398 |
########################################################
|
| 399 |
#Vektorstore speichern - bzw. laden
|
| 400 |
+
"""
|
| 401 |
def save_splits_and_metadata(splits, directory="chroma/kkg", filename="splits_and_metadata.pkl"):
|
| 402 |
# Erstellen des Verzeichnisses, falls es nicht existiert
|
| 403 |
if not os.path.exists(directory):
|
|
|
|
| 428 |
PREPROCESSED_SPLITS, SPLIT_TO_ORIGINAL_MAPPING = splits_and_metadata
|
| 429 |
return document_storage_chroma(PREPROCESSED_SPLITS)
|
| 430 |
return None
|
| 431 |
+
"""
|
| 432 |
+
|
| 433 |
+
def save_split_to_original_mapping(mapping, directory="chroma/kkg", mapping_filename="mapping.pkl"):
|
| 434 |
+
# Erstellen des Verzeichnisses, falls es nicht existiert
|
| 435 |
+
if not os.path.exists(directory):
|
| 436 |
+
os.makedirs(directory)
|
| 437 |
+
|
| 438 |
+
# Speichern des Mappings
|
| 439 |
+
mapping_filepath = os.path.join(directory, mapping_filename)
|
| 440 |
+
with open(mapping_filepath, "wb") as f:
|
| 441 |
+
pickle.dump(mapping, f)
|
| 442 |
+
|
| 443 |
+
# Hochladen der Mapping-Datei zum Hugging Face Space
|
| 444 |
+
upload_file_to_huggingface(mapping_filepath, f"{directory}/{mapping_filename}")
|
| 445 |
+
|
| 446 |
+
def load_split_to_original_mapping(directory="chroma/kkg", mapping_filename="mapping.pkl"):
|
| 447 |
+
# Laden des Mappings
|
| 448 |
+
mapping_filepath = os.path.join(directory, mapping_filename)
|
| 449 |
+
|
| 450 |
+
if os.path.exists(mapping_filepath):
|
| 451 |
+
with open(mapping_filepath, "rb") as f:
|
| 452 |
+
return pickle.load(f)
|
| 453 |
+
return None
|
| 454 |
+
|
| 455 |
+
def save_vectorstore(vectorstore, directory="chroma/kkg", splits_filename="splits_and_metadata.pkl", vectorstore_filename="vectorstore.pkl"):
|
| 456 |
+
# Erstellen des Verzeichnisses, falls es nicht existiert
|
| 457 |
+
if not os.path.exists(directory):
|
| 458 |
+
os.makedirs(directory)
|
| 459 |
+
|
| 460 |
+
# Speichern der Splits und Metadaten
|
| 461 |
+
splits_filepath = os.path.join(directory, splits_filename)
|
| 462 |
+
with open(splits_filepath, "wb") as f:
|
| 463 |
+
pickle.dump((vectorstore.documents, SPLIT_TO_ORIGINAL_MAPPING), f)
|
| 464 |
+
|
| 465 |
+
# Speichern des Vektorstores (ohne die SQLite-Verbindung)
|
| 466 |
+
vectorstore_filepath = os.path.join(directory, vectorstore_filename)
|
| 467 |
+
with open(vectorstore_filepath, "wb") as f:
|
| 468 |
+
pickle.dump(vectorstore, f)
|
| 469 |
+
|
| 470 |
+
# Hochladen der gespeicherten Dateien zum Hugging Face Space
|
| 471 |
+
upload_file_to_huggingface(splits_filepath, f"{directory}/{splits_filename}")
|
| 472 |
+
upload_file_to_huggingface(vectorstore_filepath, f"{directory}/{vectorstore_filename}")
|
| 473 |
+
|
| 474 |
+
def load_vectorstore(directory="chroma/kkg", splits_filename="splits_and_metadata.pkl", vectorstore_filename="vectorstore.pkl"):
|
| 475 |
+
# Laden der Splits und Metadaten
|
| 476 |
+
splits_filepath = os.path.join(directory, splits_filename)
|
| 477 |
+
vectorstore_filepath = os.path.join(directory, vectorstore_filename)
|
| 478 |
+
|
| 479 |
+
if os.path.exists(splits_filepath) and os.path.exists(vectorstore_filepath):
|
| 480 |
+
with open(splits_filepath, "rb") as f:
|
| 481 |
+
documents, SPLIT_TO_ORIGINAL_MAPPING = pickle.load(f)
|
| 482 |
+
|
| 483 |
+
with open(vectorstore_filepath, "rb") as f:
|
| 484 |
+
vectorstore = pickle.load(f)
|
| 485 |
+
vectorstore.documents = documents # Setze die Dokumente im Vektorstore
|
| 486 |
+
return vectorstore, SPLIT_TO_ORIGINAL_MAPPING
|
| 487 |
+
return None, None
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
|
| 491 |
|
| 492 |
# Beispiel-Upload-Funktion
|
| 493 |
def upload_file_to_huggingface(file_path, upload_path):
|