File size: 8,072 Bytes
d485cda 52f401c e1337e7 52f401c e1337e7 52f401c e1337e7 52f401c d485cda 0337d51 d485cda 52f401c 2254c6b e1337e7 d485cda 52f401c 2254c6b 52f401c e1337e7 d485cda e1337e7 d485cda e1337e7 d485cda 0337d51 d485cda 52f401c 2254c6b 52f401c 2254c6b d485cda e1337e7 d485cda 2254c6b e1337e7 d485cda e1337e7 d485cda e1337e7 d485cda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
#!/usr/bin/env python3
"""Simple Gradio demo for the PDF attacker tools
Allows entering text, choosing attack type, and downloading the generated PDF.
"""
import os
import time
from typing import Tuple
import PyPDF2
import gradio as gr
from pdf_attacker import PDFAttacker
def _resolve_font_path(choice: str, uploaded_file) -> str:
"""Return a font path given a dropdown choice or uploaded file.
If choice is 'auto' return None so PDFAttacker will pick a reasonable default.
"""
if choice == 'auto' or not choice:
return None
# known presets mapped to candidate system paths (try first existing)
presets = {
'DejaVu Serif': [
'/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf',
],
'Liberation Serif': [
'/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf',
],
'FreeSerif': [
'/usr/share/fonts/truetype/freefont/FreeSerif.ttf',
],
'DejaVu Sans': [
'/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',
],
'Arial': [
'/usr/share/fonts/truetype/msttcorefonts/Arial.ttf',
'/usr/share/fonts/truetype/msttcorefonts/arial.ttf',
'/usr/share/fonts/truetype/arial/arial.ttf',
'/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',
],
'Helvetica': [
'/usr/share/fonts/truetype/urw-base35/Helvetica.ttf',
'/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf',
'/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',
],
'Times New Roman': [
'/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf',
'/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf',
'/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf',
],
'Roboto': [
'/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf',
'/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf',
],
'Courier': [
'/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf',
'/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf',
],
'Times': [
'/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf',
],
}
if choice in presets:
for p in presets[choice]:
if os.path.exists(p):
return p
return None
# custom uploaded file: gradio returns a local path-like string or dict
if choice == 'Custom' and uploaded_file:
# uploaded_file may be a dict-like object or a str path
if isinstance(uploaded_file, dict) and 'name' in uploaded_file:
return uploaded_file['name']
return uploaded_file
return None
# Theme customization per request
theme = gr.themes.Soft(
primary_hue="fuchsia",
secondary_hue="cyan",
neutral_hue="gray",
radius_size="none",
font=[
gr.themes.GoogleFont("IBM Plex Sans"),
"ui-sans-serif",
"system-ui",
"sans-serif",
],
font_mono=[
gr.themes.GoogleFont("IBM Plex Mono"),
"ui-monospace",
"Consolas",
"monospace",
],
)
def _ensure_tmp_dir() -> str:
"""Ensure tmp dir exists and return its path"""
path = os.path.join(os.getcwd(), "tmp")
os.makedirs(path, exist_ok=True)
return path
def _extract_text_from_pdf(pdf_path: str) -> str:
"""Extract text from a PDF file for preview"""
try:
with open(pdf_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text.strip()
except Exception as e:
return f"Error extracting text: {e}"
def generate_pdf(
text: str,
mode: str,
attack_factor: float = 0.7,
target_text: str = "",
font_choice: str = 'auto',
uploaded_font=None,
wrap_on_words: bool = True,
) -> Tuple[str, str, str]:
"""Generate selected PDF and return (pdf_path, extracted_text)
Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text
Outputs: path to generated PDF, extracted text preview
"""
tmp_dir = _ensure_tmp_dir()
timestamp = int(time.time() * 1000)
filename = f"{mode}_{timestamp}.pdf"
output_path = os.path.join(tmp_dir, filename)
# Clean input text
clean_text = " ".join(text.split())
# resolve font path and create an attacker instance for this request
font_path = _resolve_font_path(choice=font_choice, uploaded_file=uploaded_font)
attacker = PDFAttacker(font_path=font_path)
# apply wrap mode
attacker.wrap_on_words = wrap_on_words
# Build a contextual status string for the UI
resolved_font = font_path or "(auto/default)"
status_lines = [f"Font resolved to: {resolved_font}", f"Wrap on words: {wrap_on_words}"]
try:
if mode == 'normal':
attacker.create_normal_pdf(text=clean_text, output_path=output_path)
elif mode == 'attacked':
attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor)
elif mode == 'targeted':
# Targeted may raise ValueError if not feasible
attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path)
else:
return "", f"Unknown mode: {mode}"
except Exception as e:
# Surface errors to the UI
return "", f"Error extracting text: {e}", f"Error: {e}"
# Extract text to show how the copied/extracted text looks
extracted = _extract_text_from_pdf(output_path)
return output_path, extracted, "\n".join(status_lines)
def build_demo():
"""Construct and return the Gradio Blocks demo"""
with gr.Blocks(theme=theme) as demo:
gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied")
with gr.Row():
txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...")
with gr.Column():
mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode')
attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)')
target_text = gr.Textbox(lines=2, label='Target text (targeted mode)')
generate = gr.Button('Generate PDF')
# Font selection: presets + custom upload
font_choice = gr.Dropdown(choices=['auto', 'DejaVu Serif', 'Liberation Serif', 'FreeSerif', 'Arial', 'Helvetica', 'Times New Roman', 'Roboto', 'Courier', 'Custom'], value='auto', label='Font')
upload_font = gr.File(label='Upload TTF/OTF (optional)', file_count='single')
wrap_on_words = gr.Checkbox(label='Wrap on words', value=True)
download_file = gr.File(label='Download generated PDF')
extracted_preview = gr.Textbox(lines=8, label='Extracted text preview')
status_box = gr.Textbox(lines=4, label='Status')
def _on_generate(text, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words):
path, extracted, status = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text, font_choice=font_choice, uploaded_font=upload_font, wrap_on_words=wrap_on_words)
if not path:
# Return empty file and error message in preview
return None, extracted, status
return path, extracted, status
generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words], outputs=[download_file, extracted_preview, status_box])
return demo
if __name__ == '__main__':
app = build_demo()
app.launch(server_name='0.0.0.0', server_port=7860)
|