Update pdf_attacker.py
Browse files- pdf_attacker.py +58 -16
pdf_attacker.py
CHANGED
@@ -59,13 +59,29 @@ class PDFAttacker:
|
|
59 |
y = self.page_size[1] - self.margin
|
60 |
|
61 |
for item in cluster_items:
|
62 |
-
|
|
|
|
|
|
|
63 |
s = item['text']
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
x = self.margin
|
66 |
y -= self.line_height
|
67 |
-
|
68 |
-
x
|
|
|
69 |
|
70 |
c.save()
|
71 |
print(f"Normal PDF saved: {output_path}")
|
@@ -104,8 +120,19 @@ class PDFAttacker:
|
|
104 |
for line in lines:
|
105 |
x = self.margin
|
106 |
for item in line:
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
y -= self.line_height
|
110 |
|
111 |
# drawing order is per-cluster; attack by shuffling a subset
|
@@ -181,8 +208,19 @@ class PDFAttacker:
|
|
181 |
for line in lines:
|
182 |
x = self.margin
|
183 |
for item in line:
|
184 |
-
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
y -= self.line_height
|
187 |
|
188 |
c = canvas.Canvas(output_path, pagesize=self.page_size)
|
@@ -298,10 +336,17 @@ class PDFAttacker:
|
|
298 |
char_end = byte_to_char.get(next_byte, len(text))
|
299 |
else:
|
300 |
char_end = len(text)
|
301 |
-
|
302 |
substr = text[char_start:char_end]
|
303 |
-
|
304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
|
306 |
return items
|
307 |
|
@@ -309,7 +354,7 @@ class PDFAttacker:
|
|
309 |
# fallback: per-character widths
|
310 |
for ch in text:
|
311 |
w = pdfmetrics.stringWidth(ch, self.font_name, self.font_size)
|
312 |
-
items.append({'text': ch, 'width': w})
|
313 |
return items
|
314 |
|
315 |
def _find_cluster_sequence_for_target(self, cluster_items, target_text: str):
|
@@ -341,10 +386,7 @@ class PDFAttacker:
|
|
341 |
|
342 |
def main():
|
343 |
ai_text = """
|
344 |
-
The rapid advancement of artificial intelligence has transformed numerous industries
|
345 |
-
and revolutionized the way we approach complex problems. Machine learning algorithms
|
346 |
-
have demonstrated remarkable capabilities in pattern recognition, data analysis,
|
347 |
-
and predictive modeling. These technological innovations continue to push the
|
348 |
boundaries of what was previously thought impossible, enabling automation and
|
349 |
efficiency improvements across various sectors. As we move forward, the integration
|
350 |
of AI systems into our daily lives becomes increasingly prevalent and sophisticated.
|
|
|
59 |
y = self.page_size[1] - self.margin
|
60 |
|
61 |
for item in cluster_items:
|
62 |
+
# prefer HarfBuzz advance if present
|
63 |
+
adv = item.get('adv_pts', item.get('width', 0))
|
64 |
+
width_rl = item.get('width_rl', adv)
|
65 |
+
offset = item.get('offset_pts', 0)
|
66 |
s = item['text']
|
67 |
+
|
68 |
+
# stability heuristic: if measured width differs significantly from HarfBuzz advance,
|
69 |
+
# prefer the ReportLab-measured width for layout to match drawString behavior (fix em-dash cases)
|
70 |
+
thresh = max(0.5, self.font_size * 0.1)
|
71 |
+
used_adv = adv
|
72 |
+
if abs(width_rl - adv) > thresh:
|
73 |
+
used_adv = width_rl
|
74 |
+
|
75 |
+
# clamp offset if it's unreasonably large relative to advance
|
76 |
+
if abs(offset) > (used_adv * 0.6):
|
77 |
+
offset = 0
|
78 |
+
|
79 |
+
if x + used_adv > self.margin + max_width:
|
80 |
x = self.margin
|
81 |
y -= self.line_height
|
82 |
+
# draw at x + offset to respect glyph x_offset where reasonable
|
83 |
+
c.drawString(x + offset, y, s)
|
84 |
+
x += used_adv
|
85 |
|
86 |
c.save()
|
87 |
print(f"Normal PDF saved: {output_path}")
|
|
|
120 |
for line in lines:
|
121 |
x = self.margin
|
122 |
for item in line:
|
123 |
+
adv = item.get('adv_pts', item.get('width', 0))
|
124 |
+
width_rl = item.get('width_rl', adv)
|
125 |
+
offset = item.get('offset_pts', 0)
|
126 |
+
|
127 |
+
thresh = max(0.5, self.font_size * 0.1)
|
128 |
+
used_adv = adv
|
129 |
+
if abs(width_rl - adv) > thresh:
|
130 |
+
used_adv = width_rl
|
131 |
+
if abs(offset) > (used_adv * 0.6):
|
132 |
+
offset = 0
|
133 |
+
|
134 |
+
char_positions.append((x + offset, y, item['text']))
|
135 |
+
x += used_adv
|
136 |
y -= self.line_height
|
137 |
|
138 |
# drawing order is per-cluster; attack by shuffling a subset
|
|
|
208 |
for line in lines:
|
209 |
x = self.margin
|
210 |
for item in line:
|
211 |
+
adv = item.get('adv_pts', item.get('width', 0))
|
212 |
+
width_rl = item.get('width_rl', adv)
|
213 |
+
offset = item.get('offset_pts', 0)
|
214 |
+
|
215 |
+
thresh = max(0.5, self.font_size * 0.1)
|
216 |
+
used_adv = adv
|
217 |
+
if abs(width_rl - adv) > thresh:
|
218 |
+
used_adv = width_rl
|
219 |
+
if abs(offset) > (used_adv * 0.6):
|
220 |
+
offset = 0
|
221 |
+
|
222 |
+
positions.append((x + offset, y, item['text']))
|
223 |
+
x += used_adv
|
224 |
y -= self.line_height
|
225 |
|
226 |
c = canvas.Canvas(output_path, pagesize=self.page_size)
|
|
|
336 |
char_end = byte_to_char.get(next_byte, len(text))
|
337 |
else:
|
338 |
char_end = len(text)
|
339 |
+
# substring for this cluster
|
340 |
substr = text[char_start:char_end]
|
341 |
+
|
342 |
+
# Use ReportLab measured width for cluster advance and set offset to zero
|
343 |
+
try:
|
344 |
+
width_rl = pdfmetrics.stringWidth(substr, self.font_name, self.font_size)
|
345 |
+
except Exception:
|
346 |
+
# fallback: estimate from HarfBuzz if possible
|
347 |
+
adv_sum = clusters.get(start, 0)
|
348 |
+
width_rl = (adv_sum / float(self.upem)) * self.font_size
|
349 |
+
items.append({'text': substr, 'adv_pts': width_rl, 'offset_pts': 0, 'width_rl': width_rl, 'width': width_rl})
|
350 |
|
351 |
return items
|
352 |
|
|
|
354 |
# fallback: per-character widths
|
355 |
for ch in text:
|
356 |
w = pdfmetrics.stringWidth(ch, self.font_name, self.font_size)
|
357 |
+
items.append({'text': ch, 'adv_pts': w, 'offset_pts': 0, 'width_rl': w, 'width': w})
|
358 |
return items
|
359 |
|
360 |
def _find_cluster_sequence_for_target(self, cluster_items, target_text: str):
|
|
|
386 |
|
387 |
def main():
|
388 |
ai_text = """
|
389 |
+
The rapid advancement of artificial intelligence has transformed numerous industries — and revolutionized the way we approach complex problems. Machine learning algorithms have demonstrated remarkable capabilities in pattern recognition, data analysis, and predictive modeling. These technological innovations continue to push the
|
|
|
|
|
|
|
390 |
boundaries of what was previously thought impossible, enabling automation and
|
391 |
efficiency improvements across various sectors. As we move forward, the integration
|
392 |
of AI systems into our daily lives becomes increasingly prevalent and sophisticated.
|