File size: 8,072 Bytes
d485cda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52f401c
 
 
 
 
 
 
 
e1337e7
52f401c
e1337e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52f401c
 
 
e1337e7
 
 
 
52f401c
 
 
 
 
 
 
 
 
d485cda
0337d51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d485cda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52f401c
 
2254c6b
e1337e7
d485cda
 
 
 
 
 
 
 
 
 
 
 
 
52f401c
 
 
2254c6b
 
52f401c
e1337e7
 
 
 
d485cda
 
 
 
 
 
 
 
 
 
 
 
 
e1337e7
d485cda
 
 
 
e1337e7
d485cda
 
 
 
0337d51
d485cda
 
 
 
 
 
 
 
 
52f401c
 
2254c6b
52f401c
2254c6b
d485cda
 
 
e1337e7
d485cda
2254c6b
e1337e7
d485cda
 
e1337e7
 
d485cda
e1337e7
d485cda
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/env python3
"""Simple Gradio demo for the PDF attacker tools

Allows entering text, choosing attack type, and downloading the generated PDF.
"""
import os
import time
from typing import Tuple

import PyPDF2
import gradio as gr

from pdf_attacker import PDFAttacker


def _resolve_font_path(choice: str, uploaded_file) -> str:
    """Return a font path given a dropdown choice or uploaded file.

    If choice is 'auto' return None so PDFAttacker will pick a reasonable default.
    """
    if choice == 'auto' or not choice:
        return None

    # known presets mapped to candidate system paths (try first existing)
    presets = {
        'DejaVu Serif': [
            '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf',
        ],
        'Liberation Serif': [
            '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf',
        ],
        'FreeSerif': [
            '/usr/share/fonts/truetype/freefont/FreeSerif.ttf',
        ],
        'DejaVu Sans': [
            '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',
        ],
        'Arial': [
            '/usr/share/fonts/truetype/msttcorefonts/Arial.ttf',
            '/usr/share/fonts/truetype/msttcorefonts/arial.ttf',
            '/usr/share/fonts/truetype/arial/arial.ttf',
            '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',
        ],
        'Helvetica': [
            '/usr/share/fonts/truetype/urw-base35/Helvetica.ttf',
            '/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf',
            '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf',
        ],
        'Times New Roman': [
            '/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf',
            '/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf',
            '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf',
        ],
        'Roboto': [
            '/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf',
            '/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf',
        ],
        'Courier': [
            '/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf',
            '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf',
        ],
        'Times': [
            '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf',
        ],
    }

    if choice in presets:
        for p in presets[choice]:
            if os.path.exists(p):
                return p
        return None

    # custom uploaded file: gradio returns a local path-like string or dict
    if choice == 'Custom' and uploaded_file:
        # uploaded_file may be a dict-like object or a str path
        if isinstance(uploaded_file, dict) and 'name' in uploaded_file:
            return uploaded_file['name']
        return uploaded_file

    return None

# Theme customization per request
theme = gr.themes.Soft(
    primary_hue="fuchsia",
    secondary_hue="cyan",
    neutral_hue="gray",
    radius_size="none",
    font=[
        gr.themes.GoogleFont("IBM Plex Sans"),
        "ui-sans-serif",
        "system-ui",
        "sans-serif",
    ],
    font_mono=[
        gr.themes.GoogleFont("IBM Plex Mono"),
        "ui-monospace",
        "Consolas",
        "monospace",
    ],
)


def _ensure_tmp_dir() -> str:
    """Ensure tmp dir exists and return its path"""
    path = os.path.join(os.getcwd(), "tmp")
    os.makedirs(path, exist_ok=True)
    return path


def _extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from a PDF file for preview"""
    try:
        with open(pdf_path, 'rb') as f:
            reader = PyPDF2.PdfReader(f)
            text = ""
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text
        return text.strip()
    except Exception as e:
        return f"Error extracting text: {e}"


def generate_pdf(
    text: str,
    mode: str,
    attack_factor: float = 0.7,
    target_text: str = "",
    font_choice: str = 'auto',
    uploaded_font=None,
    wrap_on_words: bool = True,
) -> Tuple[str, str, str]:
    """Generate selected PDF and return (pdf_path, extracted_text)

    Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text
    Outputs: path to generated PDF, extracted text preview
    """
    tmp_dir = _ensure_tmp_dir()
    timestamp = int(time.time() * 1000)
    filename = f"{mode}_{timestamp}.pdf"
    output_path = os.path.join(tmp_dir, filename)

    # Clean input text
    clean_text = " ".join(text.split())

    # resolve font path and create an attacker instance for this request
    font_path = _resolve_font_path(choice=font_choice, uploaded_file=uploaded_font)
    attacker = PDFAttacker(font_path=font_path)
    # apply wrap mode
    attacker.wrap_on_words = wrap_on_words

    # Build a contextual status string for the UI
    resolved_font = font_path or "(auto/default)"
    status_lines = [f"Font resolved to: {resolved_font}", f"Wrap on words: {wrap_on_words}"]

    try:
        if mode == 'normal':
            attacker.create_normal_pdf(text=clean_text, output_path=output_path)
        elif mode == 'attacked':
            attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor)
        elif mode == 'targeted':
            # Targeted may raise ValueError if not feasible
            attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path)
        else:
            return "", f"Unknown mode: {mode}"

    except Exception as e:
        # Surface errors to the UI
        return "", f"Error extracting text: {e}", f"Error: {e}"

    # Extract text to show how the copied/extracted text looks
    extracted = _extract_text_from_pdf(output_path)

    return output_path, extracted, "\n".join(status_lines)


def build_demo():
    """Construct and return the Gradio Blocks demo"""
    with gr.Blocks(theme=theme) as demo:
        gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied")

        with gr.Row():
            txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...")
            with gr.Column():
                mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode')
                attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)')
                target_text = gr.Textbox(lines=2, label='Target text (targeted mode)')
                generate = gr.Button('Generate PDF')
                
                # Font selection: presets + custom upload
                font_choice = gr.Dropdown(choices=['auto', 'DejaVu Serif', 'Liberation Serif', 'FreeSerif', 'Arial', 'Helvetica', 'Times New Roman', 'Roboto', 'Courier', 'Custom'], value='auto', label='Font')
                upload_font = gr.File(label='Upload TTF/OTF (optional)', file_count='single')
                wrap_on_words = gr.Checkbox(label='Wrap on words', value=True)

        download_file = gr.File(label='Download generated PDF')
        extracted_preview = gr.Textbox(lines=8, label='Extracted text preview')
        status_box = gr.Textbox(lines=4, label='Status')

        def _on_generate(text, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words):
            path, extracted, status = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text, font_choice=font_choice, uploaded_font=upload_font, wrap_on_words=wrap_on_words)
            if not path:
                # Return empty file and error message in preview
                return None, extracted, status
            return path, extracted, status

        generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words], outputs=[download_file, extracted_preview, status_box])

    return demo


if __name__ == '__main__':
    app = build_demo()
    app.launch(server_name='0.0.0.0', server_port=7860)