acmc commited on
Commit
d485cda
·
verified ·
1 Parent(s): eeda7c3

Create gradio_demo.py

Browse files
Files changed (1) hide show
  1. gradio_demo.py +111 -0
gradio_demo.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Simple Gradio demo for the PDF attacker tools
3
+
4
+ Allows entering text, choosing attack type, and downloading the generated PDF.
5
+ """
6
+ import os
7
+ import time
8
+ from typing import Tuple
9
+
10
+ import PyPDF2
11
+ import gradio as gr
12
+
13
+ from pdf_attacker import PDFAttacker
14
+
15
+
16
+ attacker = PDFAttacker()
17
+
18
+
19
+ def _ensure_tmp_dir() -> str:
20
+ """Ensure tmp dir exists and return its path"""
21
+ path = os.path.join(os.getcwd(), "tmp")
22
+ os.makedirs(path, exist_ok=True)
23
+ return path
24
+
25
+
26
+ def _extract_text_from_pdf(pdf_path: str) -> str:
27
+ """Extract text from a PDF file for preview"""
28
+ try:
29
+ with open(pdf_path, 'rb') as f:
30
+ reader = PyPDF2.PdfReader(f)
31
+ text = ""
32
+ for page in reader.pages:
33
+ page_text = page.extract_text()
34
+ if page_text:
35
+ text += page_text
36
+ return text.strip()
37
+ except Exception as e:
38
+ return f"Error extracting text: {e}"
39
+
40
+
41
+ def generate_pdf(
42
+ text: str,
43
+ mode: str,
44
+ attack_factor: float = 0.7,
45
+ target_text: str = "",
46
+ ) -> Tuple[str, str]:
47
+ """Generate selected PDF and return (pdf_path, extracted_text)
48
+
49
+ Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text
50
+ Outputs: path to generated PDF, extracted text preview
51
+ """
52
+ tmp_dir = _ensure_tmp_dir()
53
+ timestamp = int(time.time() * 1000)
54
+ filename = f"{mode}_{timestamp}.pdf"
55
+ output_path = os.path.join(tmp_dir, filename)
56
+
57
+ # Clean input text
58
+ clean_text = " ".join(text.split())
59
+
60
+ try:
61
+ if mode == 'normal':
62
+ attacker.create_normal_pdf(text=clean_text, output_path=output_path)
63
+ elif mode == 'attacked':
64
+ attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor)
65
+ elif mode == 'targeted':
66
+ # Targeted may raise ValueError if not feasible
67
+ attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path)
68
+ else:
69
+ return "", f"Unknown mode: {mode}"
70
+
71
+ except Exception as e:
72
+ # Surface errors to the UI
73
+ return "", f"Error generating PDF: {e}"
74
+
75
+ # Extract text to show how the copied/extracted text looks
76
+ extracted = _extract_text_from_pdf(output_path)
77
+
78
+ return output_path, extracted
79
+
80
+
81
+ def build_demo():
82
+ """Construct and return the Gradio Blocks demo"""
83
+ with gr.Blocks() as demo:
84
+ gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied")
85
+
86
+ with gr.Row():
87
+ txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...")
88
+ with gr.Column():
89
+ mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode')
90
+ attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)')
91
+ target_text = gr.Textbox(lines=2, label='Target text (targeted mode)')
92
+ generate = gr.Button('Generate PDF')
93
+
94
+ download_file = gr.File(label='Download generated PDF')
95
+ extracted_preview = gr.Textbox(lines=8, label='Extracted text preview')
96
+
97
+ def _on_generate(text, mode, attack_factor, target_text):
98
+ path, extracted = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text)
99
+ if not path:
100
+ # Return empty file and error message in preview
101
+ return None, extracted
102
+ return path, extracted
103
+
104
+ generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text], outputs=[download_file, extracted_preview])
105
+
106
+ return demo
107
+
108
+
109
+ if __name__ == '__main__':
110
+ app = build_demo()
111
+ app.launch(server_name='0.0.0.0', server_port=7860)