Spaces:
Running
Running
Commit
·
ebf4c3d
1
Parent(s):
06b0249
added pdf to docx
Browse files- app.py +24 -1
- requirements.txt +4 -1
app.py
CHANGED
@@ -5,6 +5,7 @@ import tempfile
|
|
5 |
import os
|
6 |
import atexit
|
7 |
import zipfile
|
|
|
8 |
|
9 |
def merge_pdfs(pdf_files, order, start_on_odd=False):
|
10 |
pdf_writer = PdfWriter()
|
@@ -68,6 +69,14 @@ def images_to_zip(image_paths):
|
|
68 |
zipf.write(image_path, os.path.basename(image_path))
|
69 |
return zip_file_path
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
# Create Gradio interface
|
72 |
with gr.Blocks(theme="gstaff/xkcd") as demo:
|
73 |
gr.Markdown("# PDF Merger and Converter")
|
@@ -180,6 +189,20 @@ with gr.Blocks(theme="gstaff/xkcd") as demo:
|
|
180 |
outputs=[pdf_result]
|
181 |
)
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
# Launch the Gradio app
|
184 |
demo.launch()
|
185 |
|
@@ -187,7 +210,7 @@ demo.launch()
|
|
187 |
def cleanup_temp_files():
|
188 |
temp_dir = tempfile.gettempdir()
|
189 |
for filename in os.listdir(temp_dir):
|
190 |
-
if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png'):
|
191 |
os.remove(os.path.join(temp_dir, filename))
|
192 |
|
193 |
atexit.register(cleanup_temp_files)
|
|
|
5 |
import os
|
6 |
import atexit
|
7 |
import zipfile
|
8 |
+
from pdf2docx import Converter
|
9 |
|
10 |
def merge_pdfs(pdf_files, order, start_on_odd=False):
|
11 |
pdf_writer = PdfWriter()
|
|
|
69 |
zipf.write(image_path, os.path.basename(image_path))
|
70 |
return zip_file_path
|
71 |
|
72 |
+
def pdf_to_docx(pdf_file):
|
73 |
+
# Convert PDF to DOCX
|
74 |
+
temp_file_path = os.path.join(tempfile.gettempdir(), "converted.docx")
|
75 |
+
converter = Converter(pdf_file.name)
|
76 |
+
converter.convert(temp_file_path)
|
77 |
+
converter.close()
|
78 |
+
return temp_file_path
|
79 |
+
|
80 |
# Create Gradio interface
|
81 |
with gr.Blocks(theme="gstaff/xkcd") as demo:
|
82 |
gr.Markdown("# PDF Merger and Converter")
|
|
|
189 |
outputs=[pdf_result]
|
190 |
)
|
191 |
|
192 |
+
with gr.TabItem("PDF to DOCX Converter"):
|
193 |
+
gr.Markdown("some PDF files may not be converted properly due to the complexity of the PDF file")
|
194 |
+
pdf_to_docx_input = gr.File(label="Upload PDF File to Convert to DOCX", file_types=[".pdf"], file_count="single")
|
195 |
+
docx_result = gr.File(label="Download DOCX")
|
196 |
+
|
197 |
+
def convert_pdf_to_docx(pdf_file):
|
198 |
+
return pdf_to_docx(pdf_file)
|
199 |
+
|
200 |
+
pdf_to_docx_input.change(
|
201 |
+
convert_pdf_to_docx,
|
202 |
+
inputs=[pdf_to_docx_input],
|
203 |
+
outputs=[docx_result]
|
204 |
+
)
|
205 |
+
|
206 |
# Launch the Gradio app
|
207 |
demo.launch()
|
208 |
|
|
|
210 |
def cleanup_temp_files():
|
211 |
temp_dir = tempfile.gettempdir()
|
212 |
for filename in os.listdir(temp_dir):
|
213 |
+
if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png') or filename.endswith('.docx'):
|
214 |
os.remove(os.path.join(temp_dir, filename))
|
215 |
|
216 |
atexit.register(cleanup_temp_files)
|
requirements.txt
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
gradio
|
2 |
PyPDF2
|
3 |
img2pdf
|
4 |
-
pdf2image
|
|
|
|
|
|
|
|
1 |
gradio
|
2 |
PyPDF2
|
3 |
img2pdf
|
4 |
+
pdf2image
|
5 |
+
pdf2docx
|
6 |
+
PyMuPDF==1.24.14
|
7 |
+
fonttools==4.55.0
|