Update utils.py
Browse files
utils.py
CHANGED
|
@@ -128,18 +128,14 @@ RAG_CHAIN_PROMPT = PromptTemplate(input_variables = ["context", "question"],
|
|
| 128 |
#Pfad, wo Docs/Bilder/Filme abgelegt werden können - lokal, also hier im HF Space (sonst auf eigenem Rechner)
|
| 129 |
PATH_WORK = "."
|
| 130 |
CHROMA_DIR = "/chroma/kkg"
|
| 131 |
-
|
| 132 |
-
|
| 133 |
CHROMA_EXCEL = './chroma/kkg/excel'
|
| 134 |
YOUTUBE_DIR = "/youtube"
|
| 135 |
HISTORY_PFAD = "/data/history"
|
| 136 |
-
|
| 137 |
-
|
| 138 |
|
| 139 |
-
CHROMA_PDF = './chroma/kkg'
|
| 140 |
-
CHROMA_WORD = './chroma/kkg'
|
| 141 |
-
DOCS_DIR_PDF = "chroma/kkg"
|
| 142 |
-
DOCS_DIR_WORD = "chroma/kkg"
|
| 143 |
###############################################
|
| 144 |
#URLs zu Dokumenten oder andere Inhalte, die einbezogen werden sollen
|
| 145 |
PDF_URL = "https://arxiv.org/pdf/2303.08774.pdf"
|
|
@@ -331,16 +327,17 @@ def document_loading_splitting():
|
|
| 331 |
print("Directory Loader neu............................")
|
| 332 |
# kreiere einen DirectoryLoader für jeden file type
|
| 333 |
pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
|
| 334 |
-
|
| 335 |
|
| 336 |
# Load the files
|
| 337 |
pdf_documents = pdf_loader.load()
|
| 338 |
-
|
| 339 |
|
| 340 |
#alle zusammen in docs...
|
| 341 |
docs.extend(pdf_documents)
|
| 342 |
-
|
| 343 |
-
|
|
|
|
| 344 |
#andere loader...
|
| 345 |
# Load PDF
|
| 346 |
#loader = PyPDFLoader(PDF_URL)
|
|
|
|
| 128 |
#Pfad, wo Docs/Bilder/Filme abgelegt werden können - lokal, also hier im HF Space (sonst auf eigenem Rechner)
|
| 129 |
PATH_WORK = "."
|
| 130 |
CHROMA_DIR = "/chroma/kkg"
|
| 131 |
+
CHROMA_PDF = './chroma/kkg/pdf'
|
| 132 |
+
CHROMA_WORD = './chroma/kkg/word'
|
| 133 |
CHROMA_EXCEL = './chroma/kkg/excel'
|
| 134 |
YOUTUBE_DIR = "/youtube"
|
| 135 |
HISTORY_PFAD = "/data/history"
|
| 136 |
+
DOCS_DIR_PDF = "chroma/kkg/pdf"
|
| 137 |
+
DOCS_DIR_WORD = "chroma/kkg/word"
|
| 138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
###############################################
|
| 140 |
#URLs zu Dokumenten oder andere Inhalte, die einbezogen werden sollen
|
| 141 |
PDF_URL = "https://arxiv.org/pdf/2303.08774.pdf"
|
|
|
|
| 327 |
print("Directory Loader neu............................")
|
| 328 |
# kreiere einen DirectoryLoader für jeden file type
|
| 329 |
pdf_loader = create_directory_loader('.pdf', CHROMA_PDF)
|
| 330 |
+
word_loader = create_directory_loader('.word', CHROMA_WORD)
|
| 331 |
|
| 332 |
# Load the files
|
| 333 |
pdf_documents = pdf_loader.load()
|
| 334 |
+
word_documents = word_loader.load()
|
| 335 |
|
| 336 |
#alle zusammen in docs...
|
| 337 |
docs.extend(pdf_documents)
|
| 338 |
+
docs.extend(word_documents)
|
| 339 |
+
for doc in docs:
|
| 340 |
+
print("docs???..........."+str(doc.metadata["title"]))
|
| 341 |
#andere loader...
|
| 342 |
# Load PDF
|
| 343 |
#loader = PyPDFLoader(PDF_URL)
|