shriarul5273 commited on
Commit
ebf4c3d
·
1 Parent(s): 06b0249

added pdf to docx

Browse files
Files changed (2) hide show
  1. app.py +24 -1
  2. requirements.txt +4 -1
app.py CHANGED
@@ -5,6 +5,7 @@ import tempfile
5
  import os
6
  import atexit
7
  import zipfile
 
8
 
9
  def merge_pdfs(pdf_files, order, start_on_odd=False):
10
  pdf_writer = PdfWriter()
@@ -68,6 +69,14 @@ def images_to_zip(image_paths):
68
  zipf.write(image_path, os.path.basename(image_path))
69
  return zip_file_path
70
 
 
 
 
 
 
 
 
 
71
  # Create Gradio interface
72
  with gr.Blocks(theme="gstaff/xkcd") as demo:
73
  gr.Markdown("# PDF Merger and Converter")
@@ -180,6 +189,20 @@ with gr.Blocks(theme="gstaff/xkcd") as demo:
180
  outputs=[pdf_result]
181
  )
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  # Launch the Gradio app
184
  demo.launch()
185
 
@@ -187,7 +210,7 @@ demo.launch()
187
  def cleanup_temp_files():
188
  temp_dir = tempfile.gettempdir()
189
  for filename in os.listdir(temp_dir):
190
- if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png'):
191
  os.remove(os.path.join(temp_dir, filename))
192
 
193
  atexit.register(cleanup_temp_files)
 
5
  import os
6
  import atexit
7
  import zipfile
8
+ from pdf2docx import Converter
9
 
10
  def merge_pdfs(pdf_files, order, start_on_odd=False):
11
  pdf_writer = PdfWriter()
 
69
  zipf.write(image_path, os.path.basename(image_path))
70
  return zip_file_path
71
 
72
+ def pdf_to_docx(pdf_file):
73
+ # Convert PDF to DOCX
74
+ temp_file_path = os.path.join(tempfile.gettempdir(), "converted.docx")
75
+ converter = Converter(pdf_file.name)
76
+ converter.convert(temp_file_path)
77
+ converter.close()
78
+ return temp_file_path
79
+
80
  # Create Gradio interface
81
  with gr.Blocks(theme="gstaff/xkcd") as demo:
82
  gr.Markdown("# PDF Merger and Converter")
 
189
  outputs=[pdf_result]
190
  )
191
 
192
+ with gr.TabItem("PDF to DOCX Converter"):
193
+ gr.Markdown("some PDF files may not be converted properly due to the complexity of the PDF file")
194
+ pdf_to_docx_input = gr.File(label="Upload PDF File to Convert to DOCX", file_types=[".pdf"], file_count="single")
195
+ docx_result = gr.File(label="Download DOCX")
196
+
197
+ def convert_pdf_to_docx(pdf_file):
198
+ return pdf_to_docx(pdf_file)
199
+
200
+ pdf_to_docx_input.change(
201
+ convert_pdf_to_docx,
202
+ inputs=[pdf_to_docx_input],
203
+ outputs=[docx_result]
204
+ )
205
+
206
  # Launch the Gradio app
207
  demo.launch()
208
 
 
210
  def cleanup_temp_files():
211
  temp_dir = tempfile.gettempdir()
212
  for filename in os.listdir(temp_dir):
213
+ if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png') or filename.endswith('.docx'):
214
  os.remove(os.path.join(temp_dir, filename))
215
 
216
  atexit.register(cleanup_temp_files)
requirements.txt CHANGED
@@ -1,4 +1,7 @@
1
  gradio
2
  PyPDF2
3
  img2pdf
4
- pdf2image
 
 
 
 
1
  gradio
2
  PyPDF2
3
  img2pdf
4
+ pdf2image
5
+ pdf2docx
6
+ PyMuPDF==1.24.14
7
+ fonttools==4.55.0