|
|
|
"""Simple Gradio demo for the PDF attacker tools |
|
|
|
Allows entering text, choosing attack type, and downloading the generated PDF. |
|
""" |
|
import os |
|
import time |
|
from typing import Tuple |
|
|
|
import PyPDF2 |
|
import gradio as gr |
|
|
|
from pdf_attacker import PDFAttacker |
|
|
|
|
|
attacker = PDFAttacker() |
|
|
|
|
|
def _ensure_tmp_dir() -> str: |
|
"""Ensure tmp dir exists and return its path""" |
|
path = os.path.join(os.getcwd(), "tmp") |
|
os.makedirs(path, exist_ok=True) |
|
return path |
|
|
|
|
|
def _extract_text_from_pdf(pdf_path: str) -> str: |
|
"""Extract text from a PDF file for preview""" |
|
try: |
|
with open(pdf_path, 'rb') as f: |
|
reader = PyPDF2.PdfReader(f) |
|
text = "" |
|
for page in reader.pages: |
|
page_text = page.extract_text() |
|
if page_text: |
|
text += page_text |
|
return text.strip() |
|
except Exception as e: |
|
return f"Error extracting text: {e}" |
|
|
|
|
|
def generate_pdf( |
|
text: str, |
|
mode: str, |
|
attack_factor: float = 0.7, |
|
target_text: str = "", |
|
) -> Tuple[str, str]: |
|
"""Generate selected PDF and return (pdf_path, extracted_text) |
|
|
|
Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text |
|
Outputs: path to generated PDF, extracted text preview |
|
""" |
|
tmp_dir = _ensure_tmp_dir() |
|
timestamp = int(time.time() * 1000) |
|
filename = f"{mode}_{timestamp}.pdf" |
|
output_path = os.path.join(tmp_dir, filename) |
|
|
|
|
|
clean_text = " ".join(text.split()) |
|
|
|
try: |
|
if mode == 'normal': |
|
attacker.create_normal_pdf(text=clean_text, output_path=output_path) |
|
elif mode == 'attacked': |
|
attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor) |
|
elif mode == 'targeted': |
|
|
|
attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path) |
|
else: |
|
return "", f"Unknown mode: {mode}" |
|
|
|
except Exception as e: |
|
|
|
return "", f"Error generating PDF: {e}" |
|
|
|
|
|
extracted = _extract_text_from_pdf(output_path) |
|
|
|
return output_path, extracted |
|
|
|
|
|
def build_demo(): |
|
"""Construct and return the Gradio Blocks demo""" |
|
with gr.Blocks() as demo: |
|
gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied") |
|
|
|
with gr.Row(): |
|
txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...") |
|
with gr.Column(): |
|
mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode') |
|
attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)') |
|
target_text = gr.Textbox(lines=2, label='Target text (targeted mode)') |
|
generate = gr.Button('Generate PDF') |
|
|
|
download_file = gr.File(label='Download generated PDF') |
|
extracted_preview = gr.Textbox(lines=8, label='Extracted text preview') |
|
|
|
def _on_generate(text, mode, attack_factor, target_text): |
|
path, extracted = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text) |
|
if not path: |
|
|
|
return None, extracted |
|
return path, extracted |
|
|
|
generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text], outputs=[download_file, extracted_preview]) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == '__main__': |
|
app = build_demo() |
|
app.launch(server_name='0.0.0.0', server_port=7860) |
|
|