Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,17 +8,8 @@ from pathlib import Path
|
|
8 |
import re
|
9 |
|
10 |
# os.system('pip install -U magic-pdf==0.8.1')
|
11 |
-
# os.system('pip install -U magic_pdf-0.9.0a9-py3-none-any.whl')
|
12 |
os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
|
13 |
|
14 |
-
# from huggingface_hub import snapshot_download
|
15 |
-
# model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
|
16 |
-
# layoutreader_model_dir = snapshot_download('hantian/layoutreader')
|
17 |
-
|
18 |
-
# os.system('cp magic-pdf.template.json ~/magic-pdf.json')
|
19 |
-
# os.system(f"sed -i 's|/tmp/models|{model_dir}/models|g' /home/user/magic-pdf.json")
|
20 |
-
# os.system(f"sed -i 's|/tmp/layoutreader|{layoutreader_model_dir}|g' /home/user/magic-pdf.json")
|
21 |
-
|
22 |
os.system('wget https://github.com/opendatalab/MinerU/raw/master/docs/download_models_hf.py -O download_models_hf.py')
|
23 |
os.system('python download_models_hf.py')
|
24 |
os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
|
@@ -36,8 +27,6 @@ from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
|
|
36 |
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
|
37 |
from magic_pdf.tools.common import do_parse, prepare_env
|
38 |
|
39 |
-
# import spaces
|
40 |
-
|
41 |
|
42 |
def read_fn(path):
|
43 |
disk_rw = DiskReaderWriter(os.path.dirname(path))
|
@@ -131,28 +120,13 @@ def to_markdown(file_path, end_pages, ocr):
|
|
131 |
# θΏε转ζ’εηPDFθ·―εΎ
|
132 |
new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
|
133 |
|
134 |
-
# return md_content, txt_content, archive_zip_path, show_pdf(new_pdf_path)
|
135 |
return md_content, txt_content, archive_zip_path, new_pdf_path
|
136 |
|
137 |
|
138 |
-
# def show_pdf(file_path):
|
139 |
-
# with open(file_path, "rb") as f:
|
140 |
-
# base64_pdf = base64.b64encode(f.read()).decode('utf-8')
|
141 |
-
# pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" ' \
|
142 |
-
# f'width="100%" height="1000" type="application/pdf">'
|
143 |
-
# return pdf_display
|
144 |
-
|
145 |
-
|
146 |
-
def show_pdf(file):
|
147 |
-
return file
|
148 |
-
|
149 |
-
|
150 |
latex_delimiters = [{"left": "$$", "right": "$$", "display": True},
|
151 |
{"left": '$', "right": '$', "display": False}]
|
152 |
|
153 |
|
154 |
-
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|
155 |
-
|
156 |
def init_model():
|
157 |
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|
158 |
try:
|
@@ -205,7 +179,6 @@ if __name__ == "__main__":
|
|
205 |
latex_delimiters=latex_delimiters, line_breaks=True)
|
206 |
with gr.Tab("Markdown text"):
|
207 |
md_text = gr.TextArea(lines=45, show_copy_button=True)
|
208 |
-
# file.upload(fn=show_pdf, inputs=file, outputs=pdf_show)
|
209 |
change_bu.click(fn=to_markdown, inputs=[pdf_show, max_pages, is_ocr], outputs=[md, md_text, output_file, pdf_show])
|
210 |
clear_bu.add([md, pdf_show, md_text, output_file, is_ocr])
|
211 |
|
|
|
8 |
import re
|
9 |
|
10 |
# os.system('pip install -U magic-pdf==0.8.1')
|
|
|
11 |
os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
os.system('wget https://github.com/opendatalab/MinerU/raw/master/docs/download_models_hf.py -O download_models_hf.py')
|
14 |
os.system('python download_models_hf.py')
|
15 |
os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
|
|
|
27 |
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
|
28 |
from magic_pdf.tools.common import do_parse, prepare_env
|
29 |
|
|
|
|
|
30 |
|
31 |
def read_fn(path):
|
32 |
disk_rw = DiskReaderWriter(os.path.dirname(path))
|
|
|
120 |
# θΏε转ζ’εηPDFθ·―εΎ
|
121 |
new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
|
122 |
|
|
|
123 |
return md_content, txt_content, archive_zip_path, new_pdf_path
|
124 |
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
latex_delimiters = [{"left": "$$", "right": "$$", "display": True},
|
127 |
{"left": '$', "right": '$', "display": False}]
|
128 |
|
129 |
|
|
|
|
|
130 |
def init_model():
|
131 |
from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
|
132 |
try:
|
|
|
179 |
latex_delimiters=latex_delimiters, line_breaks=True)
|
180 |
with gr.Tab("Markdown text"):
|
181 |
md_text = gr.TextArea(lines=45, show_copy_button=True)
|
|
|
182 |
change_bu.click(fn=to_markdown, inputs=[pdf_show, max_pages, is_ocr], outputs=[md, md_text, output_file, pdf_show])
|
183 |
clear_bu.add([md, pdf_show, md_text, output_file, is_ocr])
|
184 |
|