Omnibus commited on
Commit
e06a544
·
1 Parent(s): 917f3b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -4
app.py CHANGED
@@ -9,6 +9,8 @@ from huggingface_hub import InferenceClient,HfApi
9
  import random
10
  import json
11
  import datetime
 
 
12
  #from query import tasks
13
  from agent import (
14
  PREFIX,
@@ -89,7 +91,6 @@ def read_txt(txt_path):
89
  return text
90
 
91
  def read_pdf(pdf_path):
92
- from pypdf import PdfReader
93
  text=""
94
  reader = PdfReader(f'{pdf_path}')
95
  number_of_pages = len(reader.pages)
@@ -100,6 +101,26 @@ def read_pdf(pdf_path):
100
  return text
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  VERBOSE = True
104
  MAX_HISTORY = 100
105
  MAX_DATA = 25000
@@ -207,12 +228,15 @@ def compress_data(c, instruct, history):
207
 
208
 
209
 
210
- def summarize(inp,history,data=None,file=None,url=None):
211
  if inp == "":
212
  inp = "Process this data"
213
  history.clear()
214
  history = [(inp,"Working on it...")]
215
  yield "",history
 
 
 
216
  if url != "":
217
  val, out = find_all(url)
218
  if not val:
@@ -280,10 +304,11 @@ with gr.Blocks() as app:
280
  file=gr.File(label="Input File (.pdf .txt)")
281
  with gr.Tab("URL"):
282
  url = gr.Textbox(label="URL")
283
-
 
284
  #text=gr.JSON()
285
  #inp_query.change(search_models,inp_query,models_dd)
286
  clear_btn.click(clear_fn,None,[prompt,chatbot])
287
- go=button.click(summarize,[prompt,chatbot,data,file,url],[prompt,chatbot])
288
  stop_button.click(None,None,None,cancels=[go])
289
  app.launch(server_port=7860,show_api=False)
 
9
  import random
10
  import json
11
  import datetime
12
+ from pypdf import PdfReader
13
+ import uuid
14
  #from query import tasks
15
  from agent import (
16
  PREFIX,
 
91
  return text
92
 
93
  def read_pdf(pdf_path):
 
94
  text=""
95
  reader = PdfReader(f'{pdf_path}')
96
  number_of_pages = len(reader.pages)
 
101
  return text
102
 
103
 
104
+ def read_pdf_online(url):
105
+ uid=uuid.uuid4()
106
+ response = requests.get(url, stream=True)
107
+ if response.status_code == 200:
108
+ with open(f"{uid}.pdf", "wb") as f:
109
+ f.write(response.content)
110
+ f.close()
111
+ #out = Path("./data.pdf")
112
+ #print (out)
113
+ reader = PdfReader(f"{uid}.pdf")
114
+ number_of_pages = len(reader.pages)
115
+ for i in range(number_of_pages-1):
116
+ page = reader.pages[i]
117
+ text = f'{text}\n{page.extract_text()}'
118
+ print (text)
119
+ else:
120
+ text = response.status_code
121
+ print(text)
122
+ return text
123
+
124
  VERBOSE = True
125
  MAX_HISTORY = 100
126
  MAX_DATA = 25000
 
228
 
229
 
230
 
231
+ def summarize(inp,history,data=None,file=None,url=None,pdf_url=None):
232
  if inp == "":
233
  inp = "Process this data"
234
  history.clear()
235
  history = [(inp,"Working on it...")]
236
  yield "",history
237
+ if pdf_url.startswith("http"):
238
+ out = read_pdf_online(url)
239
+ data=out
240
  if url != "":
241
  val, out = find_all(url)
242
  if not val:
 
304
  file=gr.File(label="Input File (.pdf .txt)")
305
  with gr.Tab("URL"):
306
  url = gr.Textbox(label="URL")
307
+ with gr.Tab("PDF URL"):
308
+ pdf_url = gr.Textbox(label="PDF URL")
309
  #text=gr.JSON()
310
  #inp_query.change(search_models,inp_query,models_dd)
311
  clear_btn.click(clear_fn,None,[prompt,chatbot])
312
+ go=button.click(summarize,[prompt,chatbot,data,file,url,pdf_url],[prompt,chatbot])
313
  stop_button.click(None,None,None,cancels=[go])
314
  app.launch(server_port=7860,show_api=False)