Spaces:
Build error
Build error
File size: 605 Bytes
52d0cfd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from PyPDF2 import PdfReader
class Paper(object):
def __init__(self, pdf_obj: PdfReader) -> None:
self._pdf_obj = pdf_obj
self._paper_meta = self._pdf_obj.metadata
def iter_pages(self, iter_text_len: int = 3000):
page_idx = 0
for page in self._pdf_obj.pages:
txt = page.extract_text()
for i in range((len(txt) // iter_text_len) + 1):
yield page_idx, i, txt[i * iter_text_len:(i + 1) * iter_text_len]
page_idx += 1
if __name__ == '__main__':
reader = PdfReader('../alexnet.pdf')
paper = Paper(reader)
|