testing with llama3
Browse files- src/interface.py +1 -1
- src/pdfchatbot.py +5 -2
src/interface.py
CHANGED
@@ -11,7 +11,7 @@ def create_demo():
|
|
11 |
with gr.Row():
|
12 |
# Add sliders here
|
13 |
with gr.Column(): # Adjust scale as needed
|
14 |
-
slider1 = gr.Slider(minimum=
|
15 |
with gr.Row():
|
16 |
with gr.Column(scale=0.60):
|
17 |
text_input = gr.Textbox(
|
|
|
11 |
with gr.Row():
|
12 |
# Add sliders here
|
13 |
with gr.Column(): # Adjust scale as needed
|
14 |
+
slider1 = gr.Slider(minimum=256, maximum=1024, value=50, label="Chunk Size")
|
15 |
with gr.Row():
|
16 |
with gr.Column(scale=0.60):
|
17 |
text_input = gr.Textbox(
|
src/pdfchatbot.py
CHANGED
@@ -36,6 +36,7 @@ class PDFChatBot:
|
|
36 |
self.model = None
|
37 |
self.pipeline = None
|
38 |
self.chain = None
|
|
|
39 |
|
40 |
def load_config(self, file_path):
|
41 |
"""
|
@@ -92,7 +93,7 @@ class PDFChatBot:
|
|
92 |
"""
|
93 |
Load the vector database from the documents and embeddings.
|
94 |
"""
|
95 |
-
text_splitter = CharacterTextSplitter(chunk_size=
|
96 |
docs = text_splitter.split_documents(self.documents)
|
97 |
self.vectordb = Chroma.from_documents(docs, self.embeddings)
|
98 |
|
@@ -181,7 +182,7 @@ class PDFChatBot:
|
|
181 |
history[-1][-1] += char
|
182 |
return history, " "
|
183 |
|
184 |
-
def render_file(self, file):
|
185 |
"""
|
186 |
Renders a specific page of a PDF file as an image.
|
187 |
|
@@ -191,8 +192,10 @@ class PDFChatBot:
|
|
191 |
Returns:
|
192 |
PIL.Image.Image: The rendered page as an image.
|
193 |
"""
|
|
|
194 |
doc = fitz.open(file.name)
|
195 |
page = doc[self.page]
|
|
|
196 |
pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
|
197 |
image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
|
198 |
return image
|
|
|
36 |
self.model = None
|
37 |
self.pipeline = None
|
38 |
self.chain = None
|
39 |
+
self.chunk_size = None
|
40 |
|
41 |
def load_config(self, file_path):
|
42 |
"""
|
|
|
93 |
"""
|
94 |
Load the vector database from the documents and embeddings.
|
95 |
"""
|
96 |
+
text_splitter = CharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=256)
|
97 |
docs = text_splitter.split_documents(self.documents)
|
98 |
self.vectordb = Chroma.from_documents(docs, self.embeddings)
|
99 |
|
|
|
182 |
history[-1][-1] += char
|
183 |
return history, " "
|
184 |
|
185 |
+
def render_file(self, file,chunk_size):
|
186 |
"""
|
187 |
Renders a specific page of a PDF file as an image.
|
188 |
|
|
|
192 |
Returns:
|
193 |
PIL.Image.Image: The rendered page as an image.
|
194 |
"""
|
195 |
+
print(chunk_size)
|
196 |
doc = fitz.open(file.name)
|
197 |
page = doc[self.page]
|
198 |
+
self.chunk_size = chunk_size
|
199 |
pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
|
200 |
image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
|
201 |
return image
|