acmc commited on
Commit
33283ff
·
verified ·
1 Parent(s): 10e136a

Update pdf_attacker.py

Browse files
Files changed (1) hide show
  1. pdf_attacker.py +58 -16
pdf_attacker.py CHANGED
@@ -59,13 +59,29 @@ class PDFAttacker:
59
  y = self.page_size[1] - self.margin
60
 
61
  for item in cluster_items:
62
- w = item['width']
 
 
 
63
  s = item['text']
64
- if x + w > self.margin + max_width:
 
 
 
 
 
 
 
 
 
 
 
 
65
  x = self.margin
66
  y -= self.line_height
67
- c.drawString(x, y, s)
68
- x += w
 
69
 
70
  c.save()
71
  print(f"Normal PDF saved: {output_path}")
@@ -104,8 +120,19 @@ class PDFAttacker:
104
  for line in lines:
105
  x = self.margin
106
  for item in line:
107
- char_positions.append((x, y, item['text']))
108
- x += item['width']
 
 
 
 
 
 
 
 
 
 
 
109
  y -= self.line_height
110
 
111
  # drawing order is per-cluster; attack by shuffling a subset
@@ -181,8 +208,19 @@ class PDFAttacker:
181
  for line in lines:
182
  x = self.margin
183
  for item in line:
184
- positions.append((x, y, item['text']))
185
- x += item['width']
 
 
 
 
 
 
 
 
 
 
 
186
  y -= self.line_height
187
 
188
  c = canvas.Canvas(output_path, pagesize=self.page_size)
@@ -298,10 +336,17 @@ class PDFAttacker:
298
  char_end = byte_to_char.get(next_byte, len(text))
299
  else:
300
  char_end = len(text)
301
- adv_sum = clusters[start]
302
  substr = text[char_start:char_end]
303
- width_pts = (adv_sum / float(self.upem)) * self.font_size
304
- items.append({'text': substr, 'width': width_pts})
 
 
 
 
 
 
 
305
 
306
  return items
307
 
@@ -309,7 +354,7 @@ class PDFAttacker:
309
  # fallback: per-character widths
310
  for ch in text:
311
  w = pdfmetrics.stringWidth(ch, self.font_name, self.font_size)
312
- items.append({'text': ch, 'width': w})
313
  return items
314
 
315
  def _find_cluster_sequence_for_target(self, cluster_items, target_text: str):
@@ -341,10 +386,7 @@ class PDFAttacker:
341
 
342
  def main():
343
  ai_text = """
344
- The rapid advancement of artificial intelligence has transformed numerous industries
345
- and revolutionized the way we approach complex problems. Machine learning algorithms
346
- have demonstrated remarkable capabilities in pattern recognition, data analysis,
347
- and predictive modeling. These technological innovations continue to push the
348
  boundaries of what was previously thought impossible, enabling automation and
349
  efficiency improvements across various sectors. As we move forward, the integration
350
  of AI systems into our daily lives becomes increasingly prevalent and sophisticated.
 
59
  y = self.page_size[1] - self.margin
60
 
61
  for item in cluster_items:
62
+ # prefer HarfBuzz advance if present
63
+ adv = item.get('adv_pts', item.get('width', 0))
64
+ width_rl = item.get('width_rl', adv)
65
+ offset = item.get('offset_pts', 0)
66
  s = item['text']
67
+
68
+ # stability heuristic: if measured width differs significantly from HarfBuzz advance,
69
+ # prefer the ReportLab-measured width for layout to match drawString behavior (fix em-dash cases)
70
+ thresh = max(0.5, self.font_size * 0.1)
71
+ used_adv = adv
72
+ if abs(width_rl - adv) > thresh:
73
+ used_adv = width_rl
74
+
75
+ # clamp offset if it's unreasonably large relative to advance
76
+ if abs(offset) > (used_adv * 0.6):
77
+ offset = 0
78
+
79
+ if x + used_adv > self.margin + max_width:
80
  x = self.margin
81
  y -= self.line_height
82
+ # draw at x + offset to respect glyph x_offset where reasonable
83
+ c.drawString(x + offset, y, s)
84
+ x += used_adv
85
 
86
  c.save()
87
  print(f"Normal PDF saved: {output_path}")
 
120
  for line in lines:
121
  x = self.margin
122
  for item in line:
123
+ adv = item.get('adv_pts', item.get('width', 0))
124
+ width_rl = item.get('width_rl', adv)
125
+ offset = item.get('offset_pts', 0)
126
+
127
+ thresh = max(0.5, self.font_size * 0.1)
128
+ used_adv = adv
129
+ if abs(width_rl - adv) > thresh:
130
+ used_adv = width_rl
131
+ if abs(offset) > (used_adv * 0.6):
132
+ offset = 0
133
+
134
+ char_positions.append((x + offset, y, item['text']))
135
+ x += used_adv
136
  y -= self.line_height
137
 
138
  # drawing order is per-cluster; attack by shuffling a subset
 
208
  for line in lines:
209
  x = self.margin
210
  for item in line:
211
+ adv = item.get('adv_pts', item.get('width', 0))
212
+ width_rl = item.get('width_rl', adv)
213
+ offset = item.get('offset_pts', 0)
214
+
215
+ thresh = max(0.5, self.font_size * 0.1)
216
+ used_adv = adv
217
+ if abs(width_rl - adv) > thresh:
218
+ used_adv = width_rl
219
+ if abs(offset) > (used_adv * 0.6):
220
+ offset = 0
221
+
222
+ positions.append((x + offset, y, item['text']))
223
+ x += used_adv
224
  y -= self.line_height
225
 
226
  c = canvas.Canvas(output_path, pagesize=self.page_size)
 
336
  char_end = byte_to_char.get(next_byte, len(text))
337
  else:
338
  char_end = len(text)
339
+ # substring for this cluster
340
  substr = text[char_start:char_end]
341
+
342
+ # Use ReportLab measured width for cluster advance and set offset to zero
343
+ try:
344
+ width_rl = pdfmetrics.stringWidth(substr, self.font_name, self.font_size)
345
+ except Exception:
346
+ # fallback: estimate from HarfBuzz if possible
347
+ adv_sum = clusters.get(start, 0)
348
+ width_rl = (adv_sum / float(self.upem)) * self.font_size
349
+ items.append({'text': substr, 'adv_pts': width_rl, 'offset_pts': 0, 'width_rl': width_rl, 'width': width_rl})
350
 
351
  return items
352
 
 
354
  # fallback: per-character widths
355
  for ch in text:
356
  w = pdfmetrics.stringWidth(ch, self.font_name, self.font_size)
357
+ items.append({'text': ch, 'adv_pts': w, 'offset_pts': 0, 'width_rl': w, 'width': w})
358
  return items
359
 
360
  def _find_cluster_sequence_for_target(self, cluster_items, target_text: str):
 
386
 
387
  def main():
388
  ai_text = """
389
+ The rapid advancement of artificial intelligence has transformed numerous industries — and revolutionized the way we approach complex problems. Machine learning algorithms have demonstrated remarkable capabilities in pattern recognition, data analysis, and predictive modeling. These technological innovations continue to push the
 
 
 
390
  boundaries of what was previously thought impossible, enabling automation and
391
  efficiency improvements across various sectors. As we move forward, the integration
392
  of AI systems into our daily lives becomes increasingly prevalent and sophisticated.