#!/usr/bin/env python3 """Simple Gradio demo for the PDF attacker tools Allows entering text, choosing attack type, and downloading the generated PDF. """ import os import time from typing import Tuple import PyPDF2 import gradio as gr from pdf_attacker import PDFAttacker def _resolve_font_path(choice: str, uploaded_file) -> str: """Return a font path given a dropdown choice or uploaded file. If choice is 'auto' return None so PDFAttacker will pick a reasonable default. """ if choice == 'auto' or not choice: return None # known presets mapped to candidate system paths (try first existing) presets = { 'DejaVu Serif': [ '/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf', ], 'Liberation Serif': [ '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', ], 'FreeSerif': [ '/usr/share/fonts/truetype/freefont/FreeSerif.ttf', ], 'DejaVu Sans': [ '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', ], 'Arial': [ '/usr/share/fonts/truetype/msttcorefonts/Arial.ttf', '/usr/share/fonts/truetype/msttcorefonts/arial.ttf', '/usr/share/fonts/truetype/arial/arial.ttf', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', ], 'Helvetica': [ '/usr/share/fonts/truetype/urw-base35/Helvetica.ttf', '/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', ], 'Times New Roman': [ '/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf', '/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf', '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', ], 'Roboto': [ '/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf', '/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf', ], 'Courier': [ '/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf', '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf', ], 'Times': [ '/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', ], } if choice in presets: for p in presets[choice]: if os.path.exists(p): return p return None # custom uploaded file: gradio returns a local path-like string or dict if choice == 'Custom' and uploaded_file: # uploaded_file may be a dict-like object or a str path if isinstance(uploaded_file, dict) and 'name' in uploaded_file: return uploaded_file['name'] return uploaded_file return None # Theme customization per request theme = gr.themes.Soft( primary_hue="fuchsia", secondary_hue="cyan", neutral_hue="gray", radius_size="none", font=[ gr.themes.GoogleFont("IBM Plex Sans"), "ui-sans-serif", "system-ui", "sans-serif", ], font_mono=[ gr.themes.GoogleFont("IBM Plex Mono"), "ui-monospace", "Consolas", "monospace", ], ) def _ensure_tmp_dir() -> str: """Ensure tmp dir exists and return its path""" path = os.path.join(os.getcwd(), "tmp") os.makedirs(path, exist_ok=True) return path def _extract_text_from_pdf(pdf_path: str) -> str: """Extract text from a PDF file for preview""" try: with open(pdf_path, 'rb') as f: reader = PyPDF2.PdfReader(f) text = "" for page in reader.pages: page_text = page.extract_text() if page_text: text += page_text return text.strip() except Exception as e: return f"Error extracting text: {e}" def generate_pdf( text: str, mode: str, attack_factor: float = 0.7, target_text: str = "", font_choice: str = 'auto', uploaded_font=None, wrap_on_words: bool = True, ) -> Tuple[str, str, str]: """Generate selected PDF and return (pdf_path, extracted_text) Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text Outputs: path to generated PDF, extracted text preview """ tmp_dir = _ensure_tmp_dir() timestamp = int(time.time() * 1000) filename = f"{mode}_{timestamp}.pdf" output_path = os.path.join(tmp_dir, filename) # Clean input text clean_text = " ".join(text.split()) # resolve font path and create an attacker instance for this request font_path = _resolve_font_path(choice=font_choice, uploaded_file=uploaded_font) attacker = PDFAttacker(font_path=font_path) # apply wrap mode attacker.wrap_on_words = wrap_on_words # Build a contextual status string for the UI resolved_font = font_path or "(auto/default)" status_lines = [f"Font resolved to: {resolved_font}", f"Wrap on words: {wrap_on_words}"] try: if mode == 'normal': attacker.create_normal_pdf(text=clean_text, output_path=output_path) elif mode == 'attacked': attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor) elif mode == 'targeted': # Targeted may raise ValueError if not feasible attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path) else: return "", f"Unknown mode: {mode}" except Exception as e: # Surface errors to the UI return "", f"Error extracting text: {e}", f"Error: {e}" # Extract text to show how the copied/extracted text looks extracted = _extract_text_from_pdf(output_path) return output_path, extracted, "\n".join(status_lines) def build_demo(): """Construct and return the Gradio Blocks demo""" with gr.Blocks(theme=theme) as demo: gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied") with gr.Row(): txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...") with gr.Column(): mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode') attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)') target_text = gr.Textbox(lines=2, label='Target text (targeted mode)') generate = gr.Button('Generate PDF') # Font selection: presets + custom upload font_choice = gr.Dropdown(choices=['auto', 'DejaVu Serif', 'Liberation Serif', 'FreeSerif', 'Arial', 'Helvetica', 'Times New Roman', 'Roboto', 'Courier', 'Custom'], value='auto', label='Font') upload_font = gr.File(label='Upload TTF/OTF (optional)', file_count='single') wrap_on_words = gr.Checkbox(label='Wrap on words', value=True) download_file = gr.File(label='Download generated PDF') extracted_preview = gr.Textbox(lines=8, label='Extracted text preview') status_box = gr.Textbox(lines=4, label='Status') def _on_generate(text, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words): path, extracted, status = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text, font_choice=font_choice, uploaded_font=upload_font, wrap_on_words=wrap_on_words) if not path: # Return empty file and error message in preview return None, extracted, status return path, extracted, status generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words], outputs=[download_file, extracted_preview, status_box]) return demo if __name__ == '__main__': app = build_demo() app.launch(server_name='0.0.0.0', server_port=7860)