Spaces:
Running
Running
Adding PDF
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
from typing import List
|
3 |
from chainlit.types import AskFileResponse
|
4 |
-
from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
|
5 |
from aimakerspace.openai_utils.prompts import (
|
6 |
UserRolePrompt,
|
7 |
SystemRolePrompt,
|
@@ -50,10 +50,10 @@ class RetrievalAugmentedQAPipeline:
|
|
50 |
text_splitter = CharacterTextSplitter()
|
51 |
|
52 |
|
53 |
-
def
|
54 |
import tempfile
|
55 |
|
56 |
-
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=
|
57 |
temp_file_path = temp_file.name
|
58 |
|
59 |
with open(file.path, "rb") as f:
|
@@ -62,10 +62,12 @@ def process_text_file(file: AskFileResponse):
|
|
62 |
with open(temp_file_path, "wb") as f:
|
63 |
f.write(content)
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
69 |
texts = text_splitter.split_texts(documents)
|
70 |
return texts
|
71 |
|
@@ -77,8 +79,8 @@ async def on_chat_start():
|
|
77 |
# Wait for the user to upload a file
|
78 |
while files == None:
|
79 |
files = await cl.AskFileMessage(
|
80 |
-
content="Please upload a Text
|
81 |
-
accept=["text/plain"],
|
82 |
max_size_mb=2,
|
83 |
timeout=180,
|
84 |
).send()
|
@@ -91,7 +93,7 @@ async def on_chat_start():
|
|
91 |
await msg.send()
|
92 |
|
93 |
# load the file
|
94 |
-
texts =
|
95 |
|
96 |
print(f"Processing {len(texts)} text chunks")
|
97 |
|
|
|
1 |
import os
|
2 |
from typing import List
|
3 |
from chainlit.types import AskFileResponse
|
4 |
+
from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
|
5 |
from aimakerspace.openai_utils.prompts import (
|
6 |
UserRolePrompt,
|
7 |
SystemRolePrompt,
|
|
|
50 |
text_splitter = CharacterTextSplitter()
|
51 |
|
52 |
|
53 |
+
def process_file(file: AskFileResponse):
|
54 |
import tempfile
|
55 |
|
56 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=file.name.split('.')[-1]) as temp_file:
|
57 |
temp_file_path = temp_file.name
|
58 |
|
59 |
with open(file.path, "rb") as f:
|
|
|
62 |
with open(temp_file_path, "wb") as f:
|
63 |
f.write(content)
|
64 |
|
65 |
+
if file.name.lower().endswith('.pdf'):
|
66 |
+
loader = PDFLoader(temp_file_path)
|
67 |
+
else:
|
68 |
+
loader = TextFileLoader(temp_file_path)
|
69 |
+
|
70 |
+
documents = loader.load_documents()
|
71 |
texts = text_splitter.split_texts(documents)
|
72 |
return texts
|
73 |
|
|
|
79 |
# Wait for the user to upload a file
|
80 |
while files == None:
|
81 |
files = await cl.AskFileMessage(
|
82 |
+
content="Please upload a Text or PDF file to begin!",
|
83 |
+
accept=["text/plain", "application/pdf"],
|
84 |
max_size_mb=2,
|
85 |
timeout=180,
|
86 |
).send()
|
|
|
93 |
await msg.send()
|
94 |
|
95 |
# load the file
|
96 |
+
texts = process_file(file)
|
97 |
|
98 |
print(f"Processing {len(texts)} text chunks")
|
99 |
|