hanzla commited on
Commit
f0b26a9
·
1 Parent(s): 283e7c7

testing with llama3

Browse files
Files changed (2) hide show
  1. src/interface.py +1 -1
  2. src/pdfchatbot.py +5 -2
src/interface.py CHANGED
@@ -11,7 +11,7 @@ def create_demo():
11
  with gr.Row():
12
  # Add sliders here
13
  with gr.Column(): # Adjust scale as needed
14
- slider1 = gr.Slider(minimum=0, maximum=100, value=50, label="Chunk Size")
15
  with gr.Row():
16
  with gr.Column(scale=0.60):
17
  text_input = gr.Textbox(
 
11
  with gr.Row():
12
  # Add sliders here
13
  with gr.Column(): # Adjust scale as needed
14
+ slider1 = gr.Slider(minimum=256, maximum=1024, value=50, label="Chunk Size")
15
  with gr.Row():
16
  with gr.Column(scale=0.60):
17
  text_input = gr.Textbox(
src/pdfchatbot.py CHANGED
@@ -36,6 +36,7 @@ class PDFChatBot:
36
  self.model = None
37
  self.pipeline = None
38
  self.chain = None
 
39
 
40
  def load_config(self, file_path):
41
  """
@@ -92,7 +93,7 @@ class PDFChatBot:
92
  """
93
  Load the vector database from the documents and embeddings.
94
  """
95
- text_splitter = CharacterTextSplitter(chunk_size=256, chunk_overlap=0)
96
  docs = text_splitter.split_documents(self.documents)
97
  self.vectordb = Chroma.from_documents(docs, self.embeddings)
98
 
@@ -181,7 +182,7 @@ class PDFChatBot:
181
  history[-1][-1] += char
182
  return history, " "
183
 
184
- def render_file(self, file):
185
  """
186
  Renders a specific page of a PDF file as an image.
187
 
@@ -191,8 +192,10 @@ class PDFChatBot:
191
  Returns:
192
  PIL.Image.Image: The rendered page as an image.
193
  """
 
194
  doc = fitz.open(file.name)
195
  page = doc[self.page]
 
196
  pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
197
  image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
198
  return image
 
36
  self.model = None
37
  self.pipeline = None
38
  self.chain = None
39
+ self.chunk_size = None
40
 
41
  def load_config(self, file_path):
42
  """
 
93
  """
94
  Load the vector database from the documents and embeddings.
95
  """
96
+ text_splitter = CharacterTextSplitter(chunk_size=self.chunk_size, chunk_overlap=256)
97
  docs = text_splitter.split_documents(self.documents)
98
  self.vectordb = Chroma.from_documents(docs, self.embeddings)
99
 
 
182
  history[-1][-1] += char
183
  return history, " "
184
 
185
+ def render_file(self, file,chunk_size):
186
  """
187
  Renders a specific page of a PDF file as an image.
188
 
 
192
  Returns:
193
  PIL.Image.Image: The rendered page as an image.
194
  """
195
+ print(chunk_size)
196
  doc = fitz.open(file.name)
197
  page = doc[self.page]
198
+ self.chunk_size = chunk_size
199
  pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
200
  image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
201
  return image