Spaces:
Paused
Paused
progect set-up update
Browse files- .gitignore +2 -0
- Makefile +3 -0
- header.html +10 -21
- requirements.txt +4 -2
- utils.py +1 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
output/
|
Makefile
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
apt-get update && apt-get upgrade
|
2 |
+
|
3 |
+
/home/user/app/download_models_hf.py
|
header.html
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
<html>
|
2 |
<head>
|
3 |
-
<!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bulma.min.css"> -->
|
4 |
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
|
5 |
<style>
|
6 |
.link-block {
|
@@ -21,10 +20,6 @@
|
|
21 |
padding: 0 16px;
|
22 |
cursor: pointer !important;
|
23 |
}
|
24 |
-
.external-link,
|
25 |
-
.external-link:hover {
|
26 |
-
cursor: pointer !important;
|
27 |
-
}
|
28 |
a {
|
29 |
text-decoration: none;
|
30 |
}
|
@@ -43,12 +38,7 @@
|
|
43 |
gap: 24px;
|
44 |
border-radius: 8px;
|
45 |
">
|
46 |
-
<div style="
|
47 |
-
display: flex;
|
48 |
-
flex-direction: column;
|
49 |
-
align-items: center;
|
50 |
-
gap: 16px;
|
51 |
-
">
|
52 |
<div style="display: flex; flex-direction: column; gap: 8px">
|
53 |
<h1 style="
|
54 |
font-size: 48px;
|
@@ -57,7 +47,7 @@
|
|
57 |
font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
|
58 |
'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
|
59 |
">
|
60 |
-
MinerU: PDF Extraction &
|
61 |
</h1>
|
62 |
</div>
|
63 |
</div>
|
@@ -69,8 +59,9 @@
|
|
69 |
color: #fafafa;
|
70 |
opacity: 0.8;
|
71 |
">
|
72 |
-
|
73 |
-
|
|
|
74 |
</p>
|
75 |
<style>
|
76 |
.link-block {
|
@@ -85,7 +76,7 @@
|
|
85 |
<div class="publication-links">
|
86 |
<!-- Code Link. -->
|
87 |
<span class="link-block">
|
88 |
-
<a href="https://github.com/opendatalab/MinerU" class="external-link button is-normal is-rounded is-dark"
|
89 |
<span class="icon" style="margin-right: 4px">
|
90 |
<i class="fab fa-github" style="color: white; margin-right: 4px"></i>
|
91 |
</span>
|
@@ -95,7 +86,7 @@
|
|
95 |
|
96 |
<!-- arXiv Link. -->
|
97 |
<span class="link-block">
|
98 |
-
<a href="https://arxiv.org/abs/2409.18839" class="external-link button is-normal is-rounded is-dark"
|
99 |
<span class="icon" style="margin-right: 8px">
|
100 |
<i class="fas fa-file" style="color: white"></i>
|
101 |
</span>
|
@@ -105,7 +96,7 @@
|
|
105 |
|
106 |
<!-- Homepage Link. -->
|
107 |
<span class="link-block">
|
108 |
-
<a href="https://mineru.org.cn/home?source=huggingface" class="external-link button is-normal is-rounded is-dark"
|
109 |
<span class="icon" style="margin-right: 8px">
|
110 |
<i class="fas fa-home" style="color: white"></i>
|
111 |
</span>
|
@@ -115,7 +106,7 @@
|
|
115 |
|
116 |
<!-- Client Link. -->
|
117 |
<span class="link-block">
|
118 |
-
<a href="https://mineru.org.cn/client?source=huggingface" class="external-link button is-normal is-rounded is-dark"
|
119 |
<span class="icon" style="margin-right: 8px">
|
120 |
<i class="fas fa-download" style="color: white"></i>
|
121 |
</span>
|
@@ -125,7 +116,7 @@
|
|
125 |
|
126 |
<!-- Voice Reading Demo Link. -->
|
127 |
<span class="link-block">
|
128 |
-
<a href="https://mineru.org.cn/voice?source=huggingface" class="external-link button is-normal is-rounded is-dark"
|
129 |
<span class="icon" style="margin-right: 8px">
|
130 |
<i class="fas fa-volume-up" style="color: white"></i>
|
131 |
</span>
|
@@ -134,8 +125,6 @@
|
|
134 |
</span>
|
135 |
</div>
|
136 |
</div>
|
137 |
-
|
138 |
-
<!-- New Demo Links -->
|
139 |
</div>
|
140 |
</body>
|
141 |
</html>
|
|
|
1 |
<html>
|
2 |
<head>
|
|
|
3 |
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
|
4 |
<style>
|
5 |
.link-block {
|
|
|
20 |
padding: 0 16px;
|
21 |
cursor: pointer !important;
|
22 |
}
|
|
|
|
|
|
|
|
|
23 |
a {
|
24 |
text-decoration: none;
|
25 |
}
|
|
|
38 |
gap: 24px;
|
39 |
border-radius: 8px;
|
40 |
">
|
41 |
+
<div style="display: flex; flex-direction: column; align-items: center; gap: 16px">
|
|
|
|
|
|
|
|
|
|
|
42 |
<div style="display: flex; flex-direction: column; gap: 8px">
|
43 |
<h1 style="
|
44 |
font-size: 48px;
|
|
|
47 |
font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
|
48 |
'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
|
49 |
">
|
50 |
+
MinerU: AI-Powered PDF Extraction & Voice Reading
|
51 |
</h1>
|
52 |
</div>
|
53 |
</div>
|
|
|
59 |
color: #fafafa;
|
60 |
opacity: 0.8;
|
61 |
">
|
62 |
+
Built for automation and efficiency, MinerU is an open-source AI solution<br>
|
63 |
+
for extracting insights from PDFs, webpages, and e-books—now with voice-powered<br>
|
64 |
+
reading capabilities for hands-free access to your documents.
|
65 |
</p>
|
66 |
<style>
|
67 |
.link-block {
|
|
|
76 |
<div class="publication-links">
|
77 |
<!-- Code Link. -->
|
78 |
<span class="link-block">
|
79 |
+
<a href="https://github.com/opendatalab/MinerU" class="external-link button is-normal is-rounded is-dark">
|
80 |
<span class="icon" style="margin-right: 4px">
|
81 |
<i class="fab fa-github" style="color: white; margin-right: 4px"></i>
|
82 |
</span>
|
|
|
86 |
|
87 |
<!-- arXiv Link. -->
|
88 |
<span class="link-block">
|
89 |
+
<a href="https://arxiv.org/abs/2409.18839" class="external-link button is-normal is-rounded is-dark">
|
90 |
<span class="icon" style="margin-right: 8px">
|
91 |
<i class="fas fa-file" style="color: white"></i>
|
92 |
</span>
|
|
|
96 |
|
97 |
<!-- Homepage Link. -->
|
98 |
<span class="link-block">
|
99 |
+
<a href="https://mineru.org.cn/home?source=huggingface" class="external-link button is-normal is-rounded is-dark">
|
100 |
<span class="icon" style="margin-right: 8px">
|
101 |
<i class="fas fa-home" style="color: white"></i>
|
102 |
</span>
|
|
|
106 |
|
107 |
<!-- Client Link. -->
|
108 |
<span class="link-block">
|
109 |
+
<a href="https://mineru.org.cn/client?source=huggingface" class="external-link button is-normal is-rounded is-dark">
|
110 |
<span class="icon" style="margin-right: 8px">
|
111 |
<i class="fas fa-download" style="color: white"></i>
|
112 |
</span>
|
|
|
116 |
|
117 |
<!-- Voice Reading Demo Link. -->
|
118 |
<span class="link-block">
|
119 |
+
<a href="https://mineru.org.cn/voice?source=huggingface" class="external-link button is-normal is-rounded is-dark">
|
120 |
<span class="icon" style="margin-right: 8px">
|
121 |
<i class="fas fa-volume-up" style="color: white"></i>
|
122 |
</span>
|
|
|
125 |
</span>
|
126 |
</div>
|
127 |
</div>
|
|
|
|
|
128 |
</div>
|
129 |
</body>
|
130 |
</html>
|
requirements.txt
CHANGED
@@ -14,7 +14,7 @@ matplotlib
|
|
14 |
ultralytics>=8.3.48
|
15 |
paddleocr==2.7.3
|
16 |
paddlepaddle-gpu @ https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0b1-cp310-cp310-linux_x86_64.whl
|
17 |
-
struct-eqtable
|
18 |
detectron2 @ https://wheels-1251341229.cos.ap-shanghai.myqcloud.com/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl
|
19 |
magic-pdf>=1.0.1
|
20 |
torch>=2.2.2,<=2.3.1
|
@@ -23,4 +23,6 @@ rapid-table>=1.0.3,<2.0.0
|
|
23 |
rapidocr-paddle
|
24 |
rapidocr-onnxruntime
|
25 |
gradio-pdf>=0.0.21
|
26 |
-
openai
|
|
|
|
|
|
14 |
ultralytics>=8.3.48
|
15 |
paddleocr==2.7.3
|
16 |
paddlepaddle-gpu @ https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0b1-cp310-cp310-linux_x86_64.whl
|
17 |
+
struct-eqtable>=0.3.2
|
18 |
detectron2 @ https://wheels-1251341229.cos.ap-shanghai.myqcloud.com/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl
|
19 |
magic-pdf>=1.0.1
|
20 |
torch>=2.2.2,<=2.3.1
|
|
|
23 |
rapidocr-paddle
|
24 |
rapidocr-onnxruntime
|
25 |
gradio-pdf>=0.0.21
|
26 |
+
openai>=1.64.0
|
27 |
+
playsound
|
28 |
+
gTTS
|
utils.py
CHANGED
@@ -5,6 +5,7 @@ import base64
|
|
5 |
import re
|
6 |
import logging
|
7 |
|
|
|
8 |
def compress_directory_to_zip(directory_path, output_zip_path):
|
9 |
"""
|
10 |
Compresses the specified directory into a ZIP file.
|
|
|
5 |
import re
|
6 |
import logging
|
7 |
|
8 |
+
|
9 |
def compress_directory_to_zip(directory_path, output_zip_path):
|
10 |
"""
|
11 |
Compresses the specified directory into a ZIP file.
|