Chatbot / pdf_extractor.py
Ralqasimi's picture
Update pdf_extractor.py
e4a6d2a verified
raw
history blame contribute delete
382 Bytes
import fitz # PyMuPDF
def extract_text_from_pdf(pdf_path):
"""
Extracts text from PDF using PyMuPDF (fitz).
"""
text = ""
with fitz.open(pdf_path) as pdf:
for page_num in range(len(pdf)):
page = pdf[page_num]
text += page.get_text()
return text.strip()
def get_pdf_text(pdf_path):
return extract_text_from_pdf(pdf_path)