myhloli commited on
Commit
79464a4
·
verified ·
1 Parent(s): 125d9ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -19,6 +19,7 @@ os.system('python download_models_hf.py')
19
  os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
20
 
21
  os.system('cp -r paddleocr /home/user/.paddleocr')
 
22
  from gradio_pdf import PDF
23
 
24
  import gradio as gr
@@ -110,6 +111,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
110
 
111
 
112
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
 
113
  # 获取识别的md文件以及压缩包文件路径
114
  local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
115
  layout_mode, formula_enable, table_enable, language)
@@ -202,7 +204,7 @@ if __name__ == "__main__":
202
  with gr.Row():
203
  with gr.Column(variant='panel', scale=5):
204
  file = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
205
- max_pages = gr.Slider(1, 10, 5, step=1, label="Max convert pages")
206
  with gr.Row():
207
  layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], label="Layout model", value="layoutlmv3")
208
  language = gr.Dropdown(all_lang, label="Language", value="")
@@ -213,25 +215,25 @@ if __name__ == "__main__":
213
  with gr.Row():
214
  change_bu = gr.Button("Convert")
215
  clear_bu = gr.ClearButton(value="Clear")
216
- pdf_show = PDF(label="PDF preview", interactive=True, height=800)
217
  with gr.Accordion("Examples:"):
218
  example_root = os.path.join(os.path.dirname(__file__), "examples")
219
  gr.Examples(
220
  examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
221
  _.endswith("pdf")],
222
- inputs=pdf_show
223
  )
224
 
225
  with gr.Column(variant='panel', scale=5):
226
  output_file = gr.File(label="convert result", interactive=False)
227
  with gr.Tabs():
228
  with gr.Tab("Markdown rendering"):
229
- md = gr.Markdown(label="Markdown rendering", height=900, show_copy_button=True,
230
  latex_delimiters=latex_delimiters, line_breaks=True)
231
  with gr.Tab("Markdown text"):
232
  md_text = gr.TextArea(lines=45, show_copy_button=True)
233
- file.upload(fn=to_pdf, inputs=file, outputs=pdf_show)
234
- change_bu.click(fn=to_markdown, inputs=[pdf_show, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
235
  outputs=[md, md_text, output_file, pdf_show], api_name=False)
236
  clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr, table_enable, language])
237
 
 
19
  os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
20
 
21
  os.system('cp -r paddleocr /home/user/.paddleocr')
22
+ os.system('pip install -U gradio-pdf')
23
  from gradio_pdf import PDF
24
 
25
  import gradio as gr
 
111
 
112
 
113
  def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language):
114
+ file_path = to_pdf(file_path)
115
  # 获取识别的md文件以及压缩包文件路径
116
  local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr,
117
  layout_mode, formula_enable, table_enable, language)
 
204
  with gr.Row():
205
  with gr.Column(variant='panel', scale=5):
206
  file = gr.File(label="Please upload a PDF or image", file_types=[".pdf", ".png", ".jpeg", ".jpg"])
207
+ max_pages = gr.Slider(1, 20, 10, step=1, label='Max convert pages')
208
  with gr.Row():
209
  layout_mode = gr.Dropdown(["layoutlmv3", "doclayout_yolo"], label="Layout model", value="layoutlmv3")
210
  language = gr.Dropdown(all_lang, label="Language", value="")
 
215
  with gr.Row():
216
  change_bu = gr.Button("Convert")
217
  clear_bu = gr.ClearButton(value="Clear")
218
+ pdf_show = PDF(label='PDF preview', interactive=False, visible=True, height=800)
219
  with gr.Accordion("Examples:"):
220
  example_root = os.path.join(os.path.dirname(__file__), "examples")
221
  gr.Examples(
222
  examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
223
  _.endswith("pdf")],
224
+ inputs=file
225
  )
226
 
227
  with gr.Column(variant='panel', scale=5):
228
  output_file = gr.File(label="convert result", interactive=False)
229
  with gr.Tabs():
230
  with gr.Tab("Markdown rendering"):
231
+ md = gr.Markdown(label="Markdown rendering", height=1100, show_copy_button=True,
232
  latex_delimiters=latex_delimiters, line_breaks=True)
233
  with gr.Tab("Markdown text"):
234
  md_text = gr.TextArea(lines=45, show_copy_button=True)
235
+ file.change(fn=to_pdf, inputs=file, outputs=pdf_show)
236
+ change_bu.click(fn=to_markdown, inputs=[file, max_pages, is_ocr, layout_mode, formula_enable, table_enable, language],
237
  outputs=[md, md_text, output_file, pdf_show], api_name=False)
238
  clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr, table_enable, language])
239