Spaces:
Paused
Paused
| import gradio as gr | |
| import subprocess | |
| css = """ | |
| .mkd { | |
| height: 500px; | |
| overflow: auto; | |
| border: 1px solid #ccc; | |
| } | |
| """ | |
| def nougat_ocr(file_name): | |
| print('******* inside nougat_ocr *******') | |
| # CLI Command to run | |
| cli_command = [ | |
| 'nougat', | |
| '--out', 'output', | |
| 'pdf', f'{file_name}', | |
| '--checkpoint', 'nougat' | |
| ] | |
| # Run the command and get .mmd file in an output folder | |
| subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) | |
| return | |
| def predict(pdf_file): | |
| print('******* inside predict *******') | |
| print(f"temporary file - {pdf_file.name}") | |
| pdf_name = pdf_file.name.split('/')[-1].split('.')[0] | |
| print(f"pdf file name - {pdf_name}") | |
| #! Get prediction for a PDF using nougat | |
| nougat_ocr(pdf_file.name) | |
| print("BAACCKKK") | |
| # Open the multimarkdown (.mmd) file for reading | |
| with open(f'output/{pdf_name}.mmd', 'r') as file: | |
| content = file.read() | |
| return content | |
| with gr.Blocks(css=css) as demo: | |
| gr.HTML("<h1><center>Nougat: Neural Optical Understanding for Academic Documents<center><h1>") | |
| gr.HTML("<h3><center>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a><center></h3>") | |
| with gr.Row(): | |
| pdf_file = gr.File(label='Upload a PDF', scale=1) | |
| mkd = gr.Markdown('<h2><center><i>OR</i></center></h2>',scale=1) | |
| pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Provide a link', scale=1) | |
| btn = gr.Button() | |
| parsed_output = gr.Markdown(elem_id='mkd') | |
| btn.click(predict, pdf_file, parsed_output ) | |
| demo.queue() | |
| demo.launch(debug=True) | |