|
|
|
"""Simple Gradio demo for the PDF attacker tools |
|
|
|
Allows entering text, choosing attack type, and downloading the generated PDF. |
|
""" |
|
import os |
|
import time |
|
from typing import Tuple |
|
|
|
import PyPDF2 |
|
import gradio as gr |
|
|
|
from pdf_attacker import PDFAttacker |
|
|
|
|
|
def _resolve_font_path(choice: str, uploaded_file) -> str: |
|
"""Return a font path given a dropdown choice or uploaded file. |
|
|
|
If choice is 'auto' return None so PDFAttacker will pick a reasonable default. |
|
""" |
|
if choice == 'auto' or not choice: |
|
return None |
|
|
|
|
|
presets = { |
|
'DejaVu Serif': [ |
|
'/usr/share/fonts/truetype/dejavu/DejaVuSerif.ttf', |
|
], |
|
'Liberation Serif': [ |
|
'/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', |
|
], |
|
'FreeSerif': [ |
|
'/usr/share/fonts/truetype/freefont/FreeSerif.ttf', |
|
], |
|
'DejaVu Sans': [ |
|
'/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', |
|
], |
|
'Arial': [ |
|
'/usr/share/fonts/truetype/msttcorefonts/Arial.ttf', |
|
'/usr/share/fonts/truetype/msttcorefonts/arial.ttf', |
|
'/usr/share/fonts/truetype/arial/arial.ttf', |
|
'/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', |
|
], |
|
'Helvetica': [ |
|
'/usr/share/fonts/truetype/urw-base35/Helvetica.ttf', |
|
'/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf', |
|
'/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', |
|
], |
|
'Times New Roman': [ |
|
'/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf', |
|
'/usr/share/fonts/truetype/msttcorefonts/Times_New_Roman.ttf', |
|
'/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', |
|
], |
|
'Roboto': [ |
|
'/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf', |
|
'/usr/share/fonts/truetype/roboto/Roboto-Regular.ttf', |
|
], |
|
'Courier': [ |
|
'/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf', |
|
'/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf', |
|
], |
|
'Times': [ |
|
'/usr/share/fonts/truetype/liberation/LiberationSerif-Regular.ttf', |
|
], |
|
} |
|
|
|
if choice in presets: |
|
for p in presets[choice]: |
|
if os.path.exists(p): |
|
return p |
|
return None |
|
|
|
|
|
if choice == 'Custom' and uploaded_file: |
|
|
|
if isinstance(uploaded_file, dict) and 'name' in uploaded_file: |
|
return uploaded_file['name'] |
|
return uploaded_file |
|
|
|
return None |
|
|
|
|
|
theme = gr.themes.Soft( |
|
primary_hue="fuchsia", |
|
secondary_hue="cyan", |
|
neutral_hue="gray", |
|
radius_size="none", |
|
font=[ |
|
gr.themes.GoogleFont("IBM Plex Sans"), |
|
"ui-sans-serif", |
|
"system-ui", |
|
"sans-serif", |
|
], |
|
font_mono=[ |
|
gr.themes.GoogleFont("IBM Plex Mono"), |
|
"ui-monospace", |
|
"Consolas", |
|
"monospace", |
|
], |
|
) |
|
|
|
|
|
def _ensure_tmp_dir() -> str: |
|
"""Ensure tmp dir exists and return its path""" |
|
path = os.path.join(os.getcwd(), "tmp") |
|
os.makedirs(path, exist_ok=True) |
|
return path |
|
|
|
|
|
def _extract_text_from_pdf(pdf_path: str) -> str: |
|
"""Extract text from a PDF file for preview""" |
|
try: |
|
with open(pdf_path, 'rb') as f: |
|
reader = PyPDF2.PdfReader(f) |
|
text = "" |
|
for page in reader.pages: |
|
page_text = page.extract_text() |
|
if page_text: |
|
text += page_text |
|
return text.strip() |
|
except Exception as e: |
|
return f"Error extracting text: {e}" |
|
|
|
|
|
def generate_pdf( |
|
text: str, |
|
mode: str, |
|
attack_factor: float = 0.7, |
|
target_text: str = "", |
|
font_choice: str = 'auto', |
|
uploaded_font=None, |
|
wrap_on_words: bool = True, |
|
) -> Tuple[str, str, str]: |
|
"""Generate selected PDF and return (pdf_path, extracted_text) |
|
|
|
Inputs: text, mode: 'normal'|'attacked'|'targeted', attack_factor, target_text |
|
Outputs: path to generated PDF, extracted text preview |
|
""" |
|
tmp_dir = _ensure_tmp_dir() |
|
timestamp = int(time.time() * 1000) |
|
filename = f"{mode}_{timestamp}.pdf" |
|
output_path = os.path.join(tmp_dir, filename) |
|
|
|
|
|
clean_text = " ".join(text.split()) |
|
|
|
|
|
font_path = _resolve_font_path(choice=font_choice, uploaded_file=uploaded_font) |
|
attacker = PDFAttacker(font_path=font_path) |
|
|
|
attacker.wrap_on_words = wrap_on_words |
|
|
|
|
|
resolved_font = font_path or "(auto/default)" |
|
status_lines = [f"Font resolved to: {resolved_font}", f"Wrap on words: {wrap_on_words}"] |
|
|
|
try: |
|
if mode == 'normal': |
|
attacker.create_normal_pdf(text=clean_text, output_path=output_path) |
|
elif mode == 'attacked': |
|
attacker.create_attacked_pdf(text=clean_text, output_path=output_path, attack_factor=attack_factor) |
|
elif mode == 'targeted': |
|
|
|
attacker.create_targeted_pdf(text=clean_text, target_text=target_text, output_path=output_path) |
|
else: |
|
return "", f"Unknown mode: {mode}" |
|
|
|
except Exception as e: |
|
|
|
return "", f"Error extracting text: {e}", f"Error: {e}" |
|
|
|
|
|
extracted = _extract_text_from_pdf(output_path) |
|
|
|
return output_path, extracted, "\n".join(status_lines) |
|
|
|
|
|
def build_demo(): |
|
"""Construct and return the Gradio Blocks demo""" |
|
with gr.Blocks(theme=theme) as demo: |
|
gr.Markdown("# PDF Humanizer: Attack demo\nGenerate PDFs that look normal but extract differently when copied") |
|
|
|
with gr.Row(): |
|
txt = gr.Textbox(lines=8, label="Input text", value="Enter or paste text here...") |
|
with gr.Column(): |
|
mode = gr.Radio(choices=['normal', 'attacked', 'targeted'], value='attacked', label='Mode') |
|
attack_factor = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, value=0.7, label='Attack factor (attacked mode)') |
|
target_text = gr.Textbox(lines=2, label='Target text (targeted mode)') |
|
generate = gr.Button('Generate PDF') |
|
|
|
|
|
font_choice = gr.Dropdown(choices=['auto', 'DejaVu Serif', 'Liberation Serif', 'FreeSerif', 'Arial', 'Helvetica', 'Times New Roman', 'Roboto', 'Courier', 'Custom'], value='auto', label='Font') |
|
upload_font = gr.File(label='Upload TTF/OTF (optional)', file_count='single') |
|
wrap_on_words = gr.Checkbox(label='Wrap on words', value=True) |
|
|
|
download_file = gr.File(label='Download generated PDF') |
|
extracted_preview = gr.Textbox(lines=8, label='Extracted text preview') |
|
status_box = gr.Textbox(lines=4, label='Status') |
|
|
|
def _on_generate(text, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words): |
|
path, extracted, status = generate_pdf(text=text, mode=mode, attack_factor=attack_factor, target_text=target_text, font_choice=font_choice, uploaded_font=upload_font, wrap_on_words=wrap_on_words) |
|
if not path: |
|
|
|
return None, extracted, status |
|
return path, extracted, status |
|
|
|
generate.click(fn=_on_generate, inputs=[txt, mode, attack_factor, target_text, font_choice, upload_font, wrap_on_words], outputs=[download_file, extracted_preview, status_box]) |
|
|
|
return demo |
|
|
|
|
|
if __name__ == '__main__': |
|
app = build_demo() |
|
app.launch(server_name='0.0.0.0', server_port=7860) |
|
|