import gradio as gr import pdfplumber import docx import pytesseract from pdf2image import convert_from_path from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch import os # Load model MODEL_NAME = "roberta-base-openai-detector" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) # ---- File Processing ---- def extract_text_from_file(file_path): text = "" if file_path.endswith(".pdf"): try: # First try pdfplumber with pdfplumber.open(file_path) as pdf: for page in pdf.pages: text += page.extract_text() or "" # If nothing extracted, fallback to OCR if not text.strip(): images = convert_from_path(file_path) for img in images: text += pytesseract.image_to_string(img) except Exception as e: return f"❌ PDF extraction error: {str(e)}" elif file_path.endswith(".docx"): try: doc = docx.Document(file_path) text = " ".join([para.text for para in doc.paragraphs]) except Exception as e: return f"❌ DOCX extraction error: {str(e)}" elif file_path.endswith(".txt"): try: with open(file_path, "r", encoding="utf-8", errors="ignore") as f: text = f.read() except Exception as e: return f"❌ TXT extraction error: {str(e)}" return text.strip() # ---- AI Detection ---- def detect_ai(file_path): text = extract_text_from_file(file_path) if not text or text.startswith("❌"): return "❌ Could not extract text" # Truncate to fit model input inputs = tokenizer(text[:1000], return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=-1) ai_prob = probs[1].item() score = round(ai_prob * 10, 2) # scale 0–10 return f"🤖 AI-Generated Likelihood: {score}/10" # ---- Gradio UI ---- with gr.Blocks() as demo: gr.Markdown("## 📄 AI Text Detector\nUpload a PDF, DOCX, or TXT file to check if it's AI-generated.") file_input = gr.File(type="filepath", file_types=[".pdf", ".docx", ".txt"], label="Upload File") output = gr.Textbox(label="Detection Result") file_input.change(fn=detect_ai, inputs=file_input, outputs=output) if __name__ == "__main__": demo.launch()