myhloli commited on
Commit
17a9ec7
Β·
verified Β·
1 Parent(s): 2e784c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -27
app.py CHANGED
@@ -8,17 +8,8 @@ from pathlib import Path
8
  import re
9
 
10
  # os.system('pip install -U magic-pdf==0.8.1')
11
- # os.system('pip install -U magic_pdf-0.9.0a9-py3-none-any.whl')
12
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
13
 
14
- # from huggingface_hub import snapshot_download
15
- # model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
16
- # layoutreader_model_dir = snapshot_download('hantian/layoutreader')
17
-
18
- # os.system('cp magic-pdf.template.json ~/magic-pdf.json')
19
- # os.system(f"sed -i 's|/tmp/models|{model_dir}/models|g' /home/user/magic-pdf.json")
20
- # os.system(f"sed -i 's|/tmp/layoutreader|{layoutreader_model_dir}|g' /home/user/magic-pdf.json")
21
-
22
  os.system('wget https://github.com/opendatalab/MinerU/raw/master/docs/download_models_hf.py -O download_models_hf.py')
23
  os.system('python download_models_hf.py')
24
  os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
@@ -36,8 +27,6 @@ from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
36
  from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
37
  from magic_pdf.tools.common import do_parse, prepare_env
38
 
39
- # import spaces
40
-
41
 
42
  def read_fn(path):
43
  disk_rw = DiskReaderWriter(os.path.dirname(path))
@@ -131,28 +120,13 @@ def to_markdown(file_path, end_pages, ocr):
131
  # θΏ”ε›žθ½¬ζ’εŽηš„PDFθ·―εΎ„
132
  new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
133
 
134
- # return md_content, txt_content, archive_zip_path, show_pdf(new_pdf_path)
135
  return md_content, txt_content, archive_zip_path, new_pdf_path
136
 
137
 
138
- # def show_pdf(file_path):
139
- # with open(file_path, "rb") as f:
140
- # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
141
- # pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" ' \
142
- # f'width="100%" height="1000" type="application/pdf">'
143
- # return pdf_display
144
-
145
-
146
- def show_pdf(file):
147
- return file
148
-
149
-
150
  latex_delimiters = [{"left": "$$", "right": "$$", "display": True},
151
  {"left": '$', "right": '$', "display": False}]
152
 
153
 
154
- from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
155
-
156
  def init_model():
157
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
158
  try:
@@ -205,7 +179,6 @@ if __name__ == "__main__":
205
  latex_delimiters=latex_delimiters, line_breaks=True)
206
  with gr.Tab("Markdown text"):
207
  md_text = gr.TextArea(lines=45, show_copy_button=True)
208
- # file.upload(fn=show_pdf, inputs=file, outputs=pdf_show)
209
  change_bu.click(fn=to_markdown, inputs=[pdf_show, max_pages, is_ocr], outputs=[md, md_text, output_file, pdf_show])
210
  clear_bu.add([md, pdf_show, md_text, output_file, is_ocr])
211
 
 
8
  import re
9
 
10
  # os.system('pip install -U magic-pdf==0.8.1')
 
11
  os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
12
 
 
 
 
 
 
 
 
 
13
  os.system('wget https://github.com/opendatalab/MinerU/raw/master/docs/download_models_hf.py -O download_models_hf.py')
14
  os.system('python download_models_hf.py')
15
  os.system("sed -i 's|cpu|cuda|g' /home/user/magic-pdf.json")
 
27
  from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
28
  from magic_pdf.tools.common import do_parse, prepare_env
29
 
 
 
30
 
31
  def read_fn(path):
32
  disk_rw = DiskReaderWriter(os.path.dirname(path))
 
120
  # θΏ”ε›žθ½¬ζ’εŽηš„PDFθ·―εΎ„
121
  new_pdf_path = os.path.join(local_md_dir, file_name + "_layout.pdf")
122
 
 
123
  return md_content, txt_content, archive_zip_path, new_pdf_path
124
 
125
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  latex_delimiters = [{"left": "$$", "right": "$$", "display": True},
127
  {"left": '$', "right": '$', "display": False}]
128
 
129
 
 
 
130
  def init_model():
131
  from magic_pdf.model.doc_analyze_by_custom_model import ModelSingleton
132
  try:
 
179
  latex_delimiters=latex_delimiters, line_breaks=True)
180
  with gr.Tab("Markdown text"):
181
  md_text = gr.TextArea(lines=45, show_copy_button=True)
 
182
  change_bu.click(fn=to_markdown, inputs=[pdf_show, max_pages, is_ocr], outputs=[md, md_text, output_file, pdf_show])
183
  clear_bu.add([md, pdf_show, md_text, output_file, is_ocr])
184