pdf_highlight / app.py
adil9858's picture
Create app.py
6ab6e46 verified
import streamlit as st
import fitz # PyMuPDF
from io import BytesIO
def highlight_text_in_pdf(pdf_bytes, text_to_highlight):
# Open the PDF
pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
# Create a highlight annotation for each occurrence of the text
for page_number in range(len(pdf_document)):
page = pdf_document[page_number]
for text in text_to_highlight:
text_instances = page.search_for(text)
for instance in text_instances:
highlight = page.add_highlight_annot(instance)
# Save the modified PDF to a BytesIO buffer
output_pdf_bytes = BytesIO()
pdf_document.save(output_pdf_bytes)
pdf_document.close()
output_pdf_bytes.seek(0)
return output_pdf_bytes
def main():
st.title("PDF Text Highlighter")
# File uploader for PDF
st.sidebar.title("Upload PDF")
uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type=["pdf"])
# Text input for highlighting
text_to_highlight = st.sidebar.text_input("Enter text to highlight")
# Highlight and download button
if st.sidebar.button("Highlight and Download"):
if uploaded_file is not None and text_to_highlight.strip() != "":
# Highlight text in PDF
highlighted_pdf_bytes = highlight_text_in_pdf(uploaded_file.read(), text_to_highlight.split())
# Download highlighted PDF
st.download_button(
label="Download Highlighted PDF",
data=highlighted_pdf_bytes,
file_name="highlighted_pdf.pdf",
mime="application/pdf"
)
st.success("PDF highlighted and ready for download.")
else:
st.error("Please upload a PDF file and specify text to highlight.")
if __name__ == "__main__":
main()