Spaces:
Runtime error
Runtime error
File size: 6,415 Bytes
35b22df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
"""
Representation and utils for ranges of PDF file pages.
Copyright (c) 2014, Steve Witham <[email protected]>.
All rights reserved. This software is available under a BSD license;
see https://github.com/py-pdf/PyPDF2/blob/main/LICENSE
"""
import re
from typing import Any, List, Tuple, Union
from .errors import ParseError
_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
# groups: 12 34 5 6 7 8
class PageRange:
"""
A slice-like representation of a range of page indices.
For example, page numbers, only starting at zero.
The syntax is like what you would put between brackets [ ].
The slice is one of the few Python types that can't be subclassed,
but this class converts to and from slices, and allows similar use.
- PageRange(str) parses a string representing a page range.
- PageRange(slice) directly "imports" a slice.
- to_slice() gives the equivalent slice.
- str() and repr() allow printing.
- indices(n) is like slice.indices(n).
"""
def __init__(self, arg: Union[slice, "PageRange", str]) -> None:
"""
Initialize with either a slice -- giving the equivalent page range,
or a PageRange object -- making a copy,
or a string like
"int", "[int]:[int]" or "[int]:[int]:[int]",
where the brackets indicate optional ints.
Remember, page indices start with zero.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
Note the difference between this notation and arguments to slice():
slice(3) means the first three pages;
PageRange("3") means the range of only the fourth page.
However PageRange(slice(3)) means the first three pages.
"""
if isinstance(arg, slice):
self._slice = arg
return
if isinstance(arg, PageRange):
self._slice = arg.to_slice()
return
m = isinstance(arg, str) and re.match(PAGE_RANGE_RE, arg)
if not m:
raise ParseError(arg)
elif m.group(2):
# Special case: just an int means a range of one page.
start = int(m.group(2))
stop = start + 1 if start != -1 else None
self._slice = slice(start, stop)
else:
self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)])
@staticmethod
def valid(input: Any) -> bool:
"""True if input is a valid initializer for a PageRange."""
return isinstance(input, (slice, PageRange)) or (
isinstance(input, str) and bool(re.match(PAGE_RANGE_RE, input))
)
def to_slice(self) -> slice:
"""Return the slice equivalent of this page range."""
return self._slice
def __str__(self) -> str:
"""A string like "1:2:3"."""
s = self._slice
indices: Union[Tuple[int, int], Tuple[int, int, int]]
if s.step is None:
if s.start is not None and s.stop == s.start + 1:
return str(s.start)
indices = s.start, s.stop
else:
indices = s.start, s.stop, s.step
return ":".join("" if i is None else str(i) for i in indices)
def __repr__(self) -> str:
"""A string like "PageRange('1:2:3')"."""
return "PageRange(" + repr(str(self)) + ")"
def indices(self, n: int) -> Tuple[int, int, int]:
"""
n is the length of the list of pages to choose from.
Returns arguments for range(). See help(slice.indices).
"""
return self._slice.indices(n)
def __eq__(self, other: Any) -> bool:
if not isinstance(other, PageRange):
return False
return self._slice == other._slice
def __add__(self, other: "PageRange") -> "PageRange":
if not isinstance(other, PageRange):
raise TypeError(f"Can't add PageRange and {type(other)}")
if self._slice.step is not None or other._slice.step is not None:
raise ValueError("Can't add PageRange with stride")
a = self._slice.start, self._slice.stop
b = other._slice.start, other._slice.stop
if a[0] > b[0]:
a, b = b, a
# Now a[0] is the smallest
if b[0] > a[1]:
# There is a gap between a and b.
raise ValueError("Can't add PageRanges with gap")
return PageRange(slice(a[0], max(a[1], b[1])))
PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
def parse_filename_page_ranges(
args: List[Union[str, PageRange, None]]
) -> List[Tuple[str, PageRange]]:
"""
Given a list of filenames and page ranges, return a list of (filename, page_range) pairs.
First arg must be a filename; other ags are filenames, page-range
expressions, slice objects, or PageRange objects.
A filename not followed by a page range indicates all pages of the file.
"""
pairs: List[Tuple[str, PageRange]] = []
pdf_filename = None
did_page_range = False
for arg in args + [None]:
if PageRange.valid(arg):
if not pdf_filename:
raise ValueError(
"The first argument must be a filename, not a page range."
)
pairs.append((pdf_filename, PageRange(arg)))
did_page_range = True
else:
# New filename or end of list--do all of the previous file?
if pdf_filename and not did_page_range:
pairs.append((pdf_filename, PAGE_RANGE_ALL))
pdf_filename = arg
did_page_range = False
return pairs
PageRangeSpec = Union[str, PageRange, Tuple[int, int], Tuple[int, int, int], List[int]]
|