File size: 3,821 Bytes
d485cda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python3
"""Simple Gradio demo for the PDF attacker tools

Allows entering text, choosing attack type, and downloading the generated PDF.
"""
import os
import time
from typing import Tuple

import PyPDF2
import gradio as gr

from pdf_attacker import PDFAttacker


attacker = PDFAttacker()


def _ensure_tmp_dir() -> str:
    """Ensure tmp dir exists and return its path"""
    path = os.path.join(os.getcwd(), "tmp")
    os.makedirs(path, exist_ok=True)
    return path


def _extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from a PDF file for preview"""
    try:
        with open(pdf_path, 'rb') as f:
            reader = PyPDF2.PdfReader(f)
            text = ""
            for page in reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text
        return text.strip()
    except Exception as e:
        return f"Error extracting text: {e}"


def generate_pdf(
    text: str,
    mode: str,
    attack_factor: float = 0.7,
    target_text: str = "",
) -> Tuple[str, str]:
    """Generate selected PDF and return (pdf_path, extracted_text)

    Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text
    Outputs: path to generated PDF, extracted text preview
    """
    tmp_dir = _ensure_tmp_dir()
    timestamp = int(time.time() * 1000)
    filename = f"{mode}_{timestamp}.pdf"
    output_path = os.path.join(tmp_dir, filename)

    # Clean input text
    clean_text = " ".join(text.split())

    try:
        if mode == 'normal':
            attacker.create_normal_pdf(text=clean_text, output_path=output_path)
        elif mode == 'attacked':
            attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor)
        elif mode == 'targeted':
            # Targeted may raise ValueError if not feasible
            attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path)
        else:
            return "", f"Unknown mode: {mode}"

    except Exception as e:
        # Surface errors to the UI
        return "", f"Error generating PDF: {e}"

    # Extract text to show how the copied/extracted text looks
    extracted = _extract_text_from_pdf(output_path)

    return output_path, extracted


def build_demo():
    """Construct and return the Gradio Blocks demo"""
    with gr.Blocks() as demo:
        gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied")

        with gr.Row():
            txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...")
            with gr.Column():
                mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode')
                attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)')
                target_text = gr.Textbox(lines=2, label='Target text (targeted mode)')
                generate = gr.Button('Generate PDF')

        download_file = gr.File(label='Download generated PDF')
        extracted_preview = gr.Textbox(lines=8, label='Extracted text preview')

        def _on_generate(text, mode, attack_factor, target_text):
            path, extracted = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text)
            if not path:
                # Return empty file and error message in preview
                return None, extracted
            return path, extracted

        generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text], outputs=[download_file, extracted_preview])

    return demo


if __name__ == '__main__':
    app = build_demo()
    app.launch(server_name='0.0.0.0', server_port=7860)