import gradio as gr
from bs4 import BeautifulSoup as bs
from pypdf import PdfReader
from pathlib import Path
import os
import sys
#import html5lib
#import copy
import requests
#from IPython.display import IFrame
def scrape(instring):
html_src=('''
Pdf viewer testing
''')
return gr.HTML.update(f'''{html_src}''')
def scrape00(instring):
response = requests.get(instring, stream=True)
if response.status_code == 200:
with open("data.pdf", "wb") as f:
f.write(response.content)
else:
print(response.status_code)
out = Path("./data.pdf")
print (out)
reader = PdfReader("data.pdf")
number_of_pages = len(reader.pages)
page = reader.pages[0]
text = page.extract_text()
return gr.HTML.update(f'''''')
def scrape1(instring):
# set the url to perform the get request
URL = f'{instring}'
page = requests.get(URL)
# load the page content
text = page.content
# make a soup object by using beautiful
# soup and set the markup as html parser
soup = bs(text, "html.parser")
out = str(soup.prettify())
return gr.HTML.update(f'''