Echo9k commited on
Commit
d0abfe5
·
1 Parent(s): a73ec05

progect set-up update

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. Makefile +3 -0
  3. header.html +10 -21
  4. requirements.txt +4 -2
  5. utils.py +1 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ __pycache__/
2
+ output/
Makefile ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ apt-get update && apt-get upgrade
2
+
3
+ /home/user/app/download_models_hf.py
header.html CHANGED
@@ -1,6 +1,5 @@
1
  <html>
2
  <head>
3
- <!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bulma.min.css"> -->
4
  <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
5
  <style>
6
  .link-block {
@@ -21,10 +20,6 @@
21
  padding: 0 16px;
22
  cursor: pointer !important;
23
  }
24
- .external-link,
25
- .external-link:hover {
26
- cursor: pointer !important;
27
- }
28
  a {
29
  text-decoration: none;
30
  }
@@ -43,12 +38,7 @@
43
  gap: 24px;
44
  border-radius: 8px;
45
  ">
46
- <div style="
47
- display: flex;
48
- flex-direction: column;
49
- align-items: center;
50
- gap: 16px;
51
- ">
52
  <div style="display: flex; flex-direction: column; gap: 8px">
53
  <h1 style="
54
  font-size: 48px;
@@ -57,7 +47,7 @@
57
  font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
58
  'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
59
  ">
60
- MinerU: PDF Extraction &amp; Voice Reading Demo
61
  </h1>
62
  </div>
63
  </div>
@@ -69,8 +59,9 @@
69
  color: #fafafa;
70
  opacity: 0.8;
71
  ">
72
- A one-stop, open-source, high-quality tool for data extraction and PDF voice reading,<br>
73
- supporting PDF, webpage, and e-book extraction.
 
74
  </p>
75
  <style>
76
  .link-block {
@@ -85,7 +76,7 @@
85
  <div class="publication-links">
86
  <!-- Code Link. -->
87
  <span class="link-block">
88
- <a href="https://github.com/opendatalab/MinerU" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
89
  <span class="icon" style="margin-right: 4px">
90
  <i class="fab fa-github" style="color: white; margin-right: 4px"></i>
91
  </span>
@@ -95,7 +86,7 @@
95
 
96
  <!-- arXiv Link. -->
97
  <span class="link-block">
98
- <a href="https://arxiv.org/abs/2409.18839" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
99
  <span class="icon" style="margin-right: 8px">
100
  <i class="fas fa-file" style="color: white"></i>
101
  </span>
@@ -105,7 +96,7 @@
105
 
106
  <!-- Homepage Link. -->
107
  <span class="link-block">
108
- <a href="https://mineru.org.cn/home?source=huggingface" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
109
  <span class="icon" style="margin-right: 8px">
110
  <i class="fas fa-home" style="color: white"></i>
111
  </span>
@@ -115,7 +106,7 @@
115
 
116
  <!-- Client Link. -->
117
  <span class="link-block">
118
- <a href="https://mineru.org.cn/client?source=huggingface" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
119
  <span class="icon" style="margin-right: 8px">
120
  <i class="fas fa-download" style="color: white"></i>
121
  </span>
@@ -125,7 +116,7 @@
125
 
126
  <!-- Voice Reading Demo Link. -->
127
  <span class="link-block">
128
- <a href="https://mineru.org.cn/voice?source=huggingface" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
129
  <span class="icon" style="margin-right: 8px">
130
  <i class="fas fa-volume-up" style="color: white"></i>
131
  </span>
@@ -134,8 +125,6 @@
134
  </span>
135
  </div>
136
  </div>
137
-
138
- <!-- New Demo Links -->
139
  </div>
140
  </body>
141
  </html>
 
1
  <html>
2
  <head>
 
3
  <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
4
  <style>
5
  .link-block {
 
20
  padding: 0 16px;
21
  cursor: pointer !important;
22
  }
 
 
 
 
23
  a {
24
  text-decoration: none;
25
  }
 
38
  gap: 24px;
39
  border-radius: 8px;
40
  ">
41
+ <div style="display: flex; flex-direction: column; align-items: center; gap: 16px">
 
 
 
 
 
42
  <div style="display: flex; flex-direction: column; gap: 8px">
43
  <h1 style="
44
  font-size: 48px;
 
47
  font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
48
  'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
49
  ">
50
+ MinerU: AI-Powered PDF Extraction & Voice Reading
51
  </h1>
52
  </div>
53
  </div>
 
59
  color: #fafafa;
60
  opacity: 0.8;
61
  ">
62
+ Built for automation and efficiency, MinerU is an open-source AI solution<br>
63
+ for extracting insights from PDFs, webpages, and e-books—now with voice-powered<br>
64
+ reading capabilities for hands-free access to your documents.
65
  </p>
66
  <style>
67
  .link-block {
 
76
  <div class="publication-links">
77
  <!-- Code Link. -->
78
  <span class="link-block">
79
+ <a href="https://github.com/opendatalab/MinerU" class="external-link button is-normal is-rounded is-dark">
80
  <span class="icon" style="margin-right: 4px">
81
  <i class="fab fa-github" style="color: white; margin-right: 4px"></i>
82
  </span>
 
86
 
87
  <!-- arXiv Link. -->
88
  <span class="link-block">
89
+ <a href="https://arxiv.org/abs/2409.18839" class="external-link button is-normal is-rounded is-dark">
90
  <span class="icon" style="margin-right: 8px">
91
  <i class="fas fa-file" style="color: white"></i>
92
  </span>
 
96
 
97
  <!-- Homepage Link. -->
98
  <span class="link-block">
99
+ <a href="https://mineru.org.cn/home?source=huggingface" class="external-link button is-normal is-rounded is-dark">
100
  <span class="icon" style="margin-right: 8px">
101
  <i class="fas fa-home" style="color: white"></i>
102
  </span>
 
106
 
107
  <!-- Client Link. -->
108
  <span class="link-block">
109
+ <a href="https://mineru.org.cn/client?source=huggingface" class="external-link button is-normal is-rounded is-dark">
110
  <span class="icon" style="margin-right: 8px">
111
  <i class="fas fa-download" style="color: white"></i>
112
  </span>
 
116
 
117
  <!-- Voice Reading Demo Link. -->
118
  <span class="link-block">
119
+ <a href="https://mineru.org.cn/voice?source=huggingface" class="external-link button is-normal is-rounded is-dark">
120
  <span class="icon" style="margin-right: 8px">
121
  <i class="fas fa-volume-up" style="color: white"></i>
122
  </span>
 
125
  </span>
126
  </div>
127
  </div>
 
 
128
  </div>
129
  </body>
130
  </html>
requirements.txt CHANGED
@@ -14,7 +14,7 @@ matplotlib
14
  ultralytics>=8.3.48
15
  paddleocr==2.7.3
16
  paddlepaddle-gpu @ https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0b1-cp310-cp310-linux_x86_64.whl
17
- struct-eqtable==0.3.2
18
  detectron2 @ https://wheels-1251341229.cos.ap-shanghai.myqcloud.com/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl
19
  magic-pdf>=1.0.1
20
  torch>=2.2.2,<=2.3.1
@@ -23,4 +23,6 @@ rapid-table>=1.0.3,<2.0.0
23
  rapidocr-paddle
24
  rapidocr-onnxruntime
25
  gradio-pdf>=0.0.21
26
- openai
 
 
 
14
  ultralytics>=8.3.48
15
  paddleocr==2.7.3
16
  paddlepaddle-gpu @ https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0b1-cp310-cp310-linux_x86_64.whl
17
+ struct-eqtable>=0.3.2
18
  detectron2 @ https://wheels-1251341229.cos.ap-shanghai.myqcloud.com/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl
19
  magic-pdf>=1.0.1
20
  torch>=2.2.2,<=2.3.1
 
23
  rapidocr-paddle
24
  rapidocr-onnxruntime
25
  gradio-pdf>=0.0.21
26
+ openai>=1.64.0
27
+ playsound
28
+ gTTS
utils.py CHANGED
@@ -5,6 +5,7 @@ import base64
5
  import re
6
  import logging
7
 
 
8
  def compress_directory_to_zip(directory_path, output_zip_path):
9
  """
10
  Compresses the specified directory into a ZIP file.
 
5
  import re
6
  import logging
7
 
8
+
9
  def compress_directory_to_zip(directory_path, output_zip_path):
10
  """
11
  Compresses the specified directory into a ZIP file.