CSAle commited on
Commit
ba43ba6
·
1 Parent(s): 3a8bd33

Adding PDF

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  from typing import List
3
  from chainlit.types import AskFileResponse
4
- from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
5
  from aimakerspace.openai_utils.prompts import (
6
  UserRolePrompt,
7
  SystemRolePrompt,
@@ -50,10 +50,10 @@ class RetrievalAugmentedQAPipeline:
50
  text_splitter = CharacterTextSplitter()
51
 
52
 
53
- def process_text_file(file: AskFileResponse):
54
  import tempfile
55
 
56
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
57
  temp_file_path = temp_file.name
58
 
59
  with open(file.path, "rb") as f:
@@ -62,10 +62,12 @@ def process_text_file(file: AskFileResponse):
62
  with open(temp_file_path, "wb") as f:
63
  f.write(content)
64
 
65
- #file response no longer has a content field
66
-
67
- text_loader = TextFileLoader(temp_file_path)
68
- documents = text_loader.load_documents()
 
 
69
  texts = text_splitter.split_texts(documents)
70
  return texts
71
 
@@ -77,8 +79,8 @@ async def on_chat_start():
77
  # Wait for the user to upload a file
78
  while files == None:
79
  files = await cl.AskFileMessage(
80
- content="Please upload a Text File file to begin!",
81
- accept=["text/plain"],
82
  max_size_mb=2,
83
  timeout=180,
84
  ).send()
@@ -91,7 +93,7 @@ async def on_chat_start():
91
  await msg.send()
92
 
93
  # load the file
94
- texts = process_text_file(file)
95
 
96
  print(f"Processing {len(texts)} text chunks")
97
 
 
1
  import os
2
  from typing import List
3
  from chainlit.types import AskFileResponse
4
+ from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
5
  from aimakerspace.openai_utils.prompts import (
6
  UserRolePrompt,
7
  SystemRolePrompt,
 
50
  text_splitter = CharacterTextSplitter()
51
 
52
 
53
+ def process_file(file: AskFileResponse):
54
  import tempfile
55
 
56
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=file.name.split('.')[-1]) as temp_file:
57
  temp_file_path = temp_file.name
58
 
59
  with open(file.path, "rb") as f:
 
62
  with open(temp_file_path, "wb") as f:
63
  f.write(content)
64
 
65
+ if file.name.lower().endswith('.pdf'):
66
+ loader = PDFLoader(temp_file_path)
67
+ else:
68
+ loader = TextFileLoader(temp_file_path)
69
+
70
+ documents = loader.load_documents()
71
  texts = text_splitter.split_texts(documents)
72
  return texts
73
 
 
79
  # Wait for the user to upload a file
80
  while files == None:
81
  files = await cl.AskFileMessage(
82
+ content="Please upload a Text or PDF file to begin!",
83
+ accept=["text/plain", "application/pdf"],
84
  max_size_mb=2,
85
  timeout=180,
86
  ).send()
 
93
  await msg.send()
94
 
95
  # load the file
96
+ texts = process_file(file)
97
 
98
  print(f"Processing {len(texts)} text chunks")
99