import streamlit as st import fitz # PyMuPDF import io import zipfile def format_data_size(size_in_bytes): units = ["B", "KB", "MB", "GB", "TB"] size = size_in_bytes unit_index = 0 while size >= 1024 and unit_index < len(units) - 1: size /= 1024 unit_index += 1 return f"{size:.2f} {units[unit_index]}" def split_pdf_by_pages_and_zip_in_memory(doc, dpi): """ Splits a PDF document into individual pages, saves each page as an image in memory, compresses the images into a ZIP archive in memory. Args: doc: A PyMuPDF (fitz) Document object. Returns: zip_data (BytesIO): In-memory ZIP archive containing all PNG files. total_pages (int): Total number of pages in the original PDF. """ total_pages = doc.page_count zip_buffer = io.BytesIO() # In-memory buffer for the ZIP archive percent = st.empty() with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf: for page_num in range(total_pages): try: # Create a new PDF document containing only the current page new_doc = fitz.open() new_doc.insert_pdf(doc, from_page=page_num, to_page=page_num) # Render the page to an image in memory pix = new_doc[0].get_pixmap(dpi=dpi) # Get pixmap of the first (and only) page image_bytes = pix.tobytes() # Add the image to the ZIP archive zipf.writestr(f"page-{page_num + 1}.png", image_bytes) new_doc.close() except Exception as e: st.error(f"Error processing page {page_num + 1}: {e}") continue # Continue processing other pages even if one fails else: p = int((page_num + 1) / total_pages * 100) percent.write(f"{p} %") st.session_state["progress_bar"].progress(p) zip_buffer.seek(0) # Reset the buffer pointer to the beginning return zip_buffer, total_pages def create_download_link(file_data, data_size, display_name): st.download_button( label=f"Download {display_name} {data_size}", data=file_data, file_name=display_name, mime="application/zip" ) def main(): st.title("PDF to Image.zip 𓁨") note_text = f'If you close the browser tab, all the files you uploaded and the files you are working on in the converting process will be completely deleted.\ If you try playing the YouTube video below and the sound stops, you will know that the browser tab has been closed or the connection has been lost.' st.markdown(note_text, unsafe_allow_html=True) st.video("https://youtu.be/L5T3VoB4qRA?si=9-FTcTYSvZaeKGaU") dpi = st.slider("Select resolution (DPI)", min_value=72, max_value=300, value=144, step=1) uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") if uploaded_file is not None: try: # Read the uploaded PDF file file_bytes = uploaded_file.read() doc = fitz.open(stream=file_bytes, filetype="pdf") # Open the PDF from bytes st.session_state["progress_bar"] = st.progress(0) # Process the PDF and generate the ZIP archive in memory zip_data, total_pages = split_pdf_by_pages_and_zip_in_memory(doc, dpi) del st.session_state["progress_bar"] st.subheader("Download ZIP Archive") zip_size = zip_data.getbuffer().nbytes data_size = format_data_size(zip_size) create_download_link(zip_data, data_size, f"pages_1_to_{total_pages}.zip") except Exception as e: st.error(f"Error processing PDF: {e}") if __name__ == "__main__": main()