Spaces:

iyadalagha
/

ai-text-detector-app

Sleeping

App Files Files

xet

Community

iyadalagha commited on Aug 27

Commit

7e73b77

1 Parent(s): 861190f

handle both ar and eng

Browse files

Files changed (4) hide show

Dockerfile +5 -23
README.md +28 -7
app.py +77 -177
requirements.txt +2 -1

Dockerfile CHANGED Viewed

@@ -1,29 +1,11 @@
-# Use Python 3.9 as the base image
-FROM python:3.9
-# Set working directory in the container
 WORKDIR /app
-# Create a non-root user and set permissions
-RUN useradd -m myuser && chown -R myuser:myuser /app
-USER myuser
-# Set Hugging Face cache directory
-ENV HF_HOME=/app/.cache/huggingface
-# Update PATH for uvicorn
-ENV PATH="/home/myuser/.local/bin:${PATH}"
-# Copy requirements.txt and install dependencies
-COPY --chown=myuser:myuser requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# Clear cache and pre-download models
-RUN rm -rf /app/.cache/huggingface/* && python -c "from transformers import pipeline; pipeline('text-classification', model='desklib/ai-text-detector-v1.01'); pipeline('text-classification', model='akshayvkt/detect-ai-text'); pipeline('text-classification', model='sabaridsnfuji/arabic-ai-text-detector')"
-# Copy the application code
-COPY --chown=myuser:myuser . .
-# Run the FastAPI app with Uvicorn
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+FROM python:3.11-slim
 WORKDIR /app
+COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,9 +1,30 @@
 ---
-title: AI Text Detector
-emoji: 🤖
-colorFrom: blue
-colorTo: green
-sdk: docker
-app_port: 7860
 ---
-A FastAPI app using akshayvkt/detect-ai-text to classify text as AI-generated or human-written for IUG.

+# 🤖 AI Text Detector (Improved)
+This project provides a **FastAPI app** for detecting whether a given text is **AI-generated** or **human-written**.
+Unlike the original version, this detector uses **two signals**:
+1. **Classifier score** from [`Hello-SimpleAI/chatgpt-detector-roberta`](https://huggingface.co/Hello-SimpleAI/chatgpt-detector-roberta)
+   → Probability that the text is AI-generated.
+2. **Perplexity score** using **GPT-2**
+   → Measures how “predictable” the text is. Lower perplexity often indicates AI-like fluency.
+The app then combines both scores into a **final label**: `AI`, `Human`, or `Uncertain`.
 ---
+## 🔧 Tech Stack
+- Python 3.11
+- FastAPI
+- Hugging Face Transformers
+- PyTorch
+- Uvicorn
+- SciPy (for perplexity math)
 ---
+## 🚀 Running the App
+### 1. Build the Docker image
+```bash
+docker build -t ai-detector .

app.py CHANGED Viewed

@@ -1,181 +1,81 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel, validator
-import re
 import torch
-from transformers import pipeline
-from collections import Counter
-import logging
-import numpy as np
-# Configure logging with more detail
-logging.basicConfig(filename="predictions.log", level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
-app = FastAPI()
-# Enable GPU if available, else use CPU
-device = 0 if torch.cuda.is_available() else -1
-torch.manual_seed(42)
-# Load AI detection models
-english_detectors = [
-    pipeline("text-classification", model="desklib/ai-text-detector-v1.01", truncation=True, max_length=512)
-    pipeline("text-classification", model="akshayvkt/detect-ai-text", device=device, truncation=True, max_length=512),
-]
-arabic_detector = pipeline("text-classification", model="sabaridsnfuji/arabic-ai-text-detector", device=device, truncation=True, max_length=512)
-def detect_language(text: str) -> str:
-    """Detect if text is Arabic or English based on Unicode character ranges."""
-    arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
-    latin_chars = len(re.findall(r'[A-Za-z]', text))
-    total_chars = arabic_chars + latin_chars
-    if total_chars == 0:
-        return 'en'
-    arabic_ratio = arabic_chars / total_chars
-    return 'ar' if arabic_ratio > 0.5 else 'en'
-def calculate_burstiness(text: str) -> float:
-    """Calculate burstiness (std/mean of sentence lengths) to bias toward human text."""
-    sentences = re.split(r'[.!?]', text)
-    lengths = [len(s.split()) for s in sentences if s]
-    return np.std(lengths) / (np.mean(lengths) + 1e-6) if lengths else 0
-def clean_text(text: str, language: str) -> str:
-    """Clean text by removing special characters and normalizing spaces. Skip lowercase for Arabic."""
-    text = re.sub(r'\s+', ' ', text)
-    text = re.sub(r'[^\w\s.,!?]', '', text)
-    text = text.strip()
-    if language == 'en':
-        text = text.lower()
-    return text
-def split_text(text: str, max_chars: int = 5000) -> list:
-    """Split text into chunks of max_chars, preserving sentence boundaries."""
-    sentences = re.split(r'(?<=[.!?])\s+', text)
-    chunks = []
-    current_chunk = ""
-    for sentence in sentences:
-        if len(current_chunk) + len(sentence) <= max_chars:
-            current_chunk += sentence + " "
-        else:
-            if current_chunk:
-                chunks.append(current_chunk.strip())
-            current_chunk = sentence + " "
-    if current_chunk:
-        chunks.append(current_chunk.strip())
-    return chunks
-class TextInput(BaseModel):
     text: str
-    @validator("text")
-    def validate_text(cls, value):
-        """Validate input text for minimum length and content."""
-        word_count = len(value.split())
-        if word_count < 50:
-            raise ValueError(f"Text too short ({word_count} words). Minimum 50 words required.")
-        if not re.search(r'[\w]', value):
-            raise ValueError("Text must contain alphabetic characters.")
-        return value
-@app.post("/predict")
-def predict(input: TextInput):
-    detected_lang = detect_language(input.text)
-    note_lang = f"Detected language: {'Arabic' if detected_lang == 'ar' else 'English'}"
-    cleaned_text = clean_text(input.text, detected_lang)
-    burstiness = calculate_burstiness(cleaned_text)
-    note_burst = f"Burstiness: {burstiness:.2f} (high suggests human)"
-    if detected_lang == 'ar':
-        detector = arabic_detector
-        is_ensemble = False
-    else:
-        detector = english_detectors
-        is_ensemble = True
-    if len(cleaned_text) > 10000:
-        chunks = split_text(cleaned_text, max_chars=5000)
-        if is_ensemble:
-            all_results = [det(chunks, truncation=True, max_length=512) for det in detector]
-            labels = []
-            scores = []
-            for chunk_idx in range(len(chunks)):
-                chunk_labels = []
-                chunk_scores = []
-                for det_idx, det_results in enumerate(all_results):
-                    score = det_results[chunk_idx]['score']
-                    label = "AI" if score >= 0.98 else "Human" if score < 0.55 else "Uncertain"
-                    chunk_labels.append(label)
-                    chunk_scores.append(score)
-                    logging.debug(f"Chunk {chunk_idx}, Model {det_idx}: Label={label}, Score={score:.4f}")
-                chunk_final_label = Counter(chunk_labels).most_common(1)[0][0]
-                if chunk_final_label == "Uncertain" or len(set(chunk_labels)) == len(detector) or any(l == "Human" for l in chunk_labels):  # Prioritize Human if any model predicts it
-                    chunk_final_label = "Human" if burstiness > 1.5 else "Uncertain"
-                labels.append(chunk_final_label)
-                scores.append(np.mean(chunk_scores))
-                logging.debug(f"Chunk {chunk_idx} Final: Label={chunk_final_label}, Avg Score={np.mean(chunk_scores):.4f}, Burstiness={burstiness:.2f}")
-            label_counts = Counter(labels)
-            final_label = label_counts.most_common(1)[0][0]
-            if final_label == "Uncertain" or len(set(labels)) == len(detector) or any(l == "Human" for l in labels):
-                final_label = "Human" if burstiness > 1.5 else "Uncertain"
-            avg_score = sum(scores) / len(scores) if scores else 0.0
-            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Chunks: {len(chunks)} | Prediction: {final_label} | Score: {avg_score:.4f} | Burstiness: {burstiness:.2f}")
-            return {
-                "prediction": final_label,
-                "score": avg_score,
-                "note": f"{note_lang}. Text was split into {len(chunks)} chunks due to length > 10,000 characters. {note_burst}.",
-                "chunk_results": [
-                    {"chunk": chunk[:50] + "...", "label": labels[i], "score": scores[i], "burstiness": burstiness}
-                    for i, chunk in enumerate(chunks)
-                ]
-            }
-        else:
-            results = detector(chunks, truncation=True, max_length=512)
-            labels = ["AI" if res['score'] >= 0.95 else "Human" if res['score'] < 0.60 else "Uncertain" for res in results]
-            if any(l == "Uncertain" for l in labels) or any(l == "Human" for l in labels):
-                labels = ["Human" if l == "Uncertain" or l == "Human" else l for l in labels if burstiness > 1.0]
-            label_counts = Counter(labels)
-            final_label = label_counts.most_common(1)[0][0]
-            scores = [res['score'] for res, label in zip(results, labels) if label == final_label]
-            avg_score = sum(scores) / len(scores) if scores else 0.0
-            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Chunks: {len(chunks)} | Prediction: {final_label} | Score: {avg_score:.4f} | Burstiness: {burstiness:.2f}")
-            return {
-                "prediction": final_label,
-                "score": avg_score,
-                "note": f"{note_lang}. Text was split into {len(chunks)} chunks due to length > 10,000 characters. {note_burst}.",
-                "chunk_results": [
-                    {"chunk": chunk[:50] + "...", "label": labels[i], "score": results[i]['score'], "burstiness": burstiness}
-                    for i, chunk in enumerate(chunks)
-                ]
-            }
     else:
-        if is_ensemble:
-            results = [det(cleaned_text, truncation=True, max_length=512)[0] for det in detector]
-            labels = []
-            scores = []
-            for det_idx, result in enumerate(results):
-                score = result['score']
-                label = "AI" if score >= 0.98 else "Human" if score < 0.55 else "Uncertain"
-                labels.append(label)
-                scores.append(score)
-                logging.debug(f"Model {det_idx}: Label={label}, Score={score:.4f}")
-            label_counts = Counter(labels)
-            final_label = label_counts.most_common(1)[0][0]
-            if final_label == "Uncertain" or len(set(labels)) == len(detector) or any(l == "Human" for l in labels):
-                final_label = "Human" if burstiness > 1.5 else "Uncertain"
-            avg_score = sum(scores) / len(scores) if scores else 0.0
-            note = f"{note_lang}. Ensemble used: {len(detector)} models. {note_burst}."
-            if 0.55 <= avg_score < 0.98:
-                note += " Warning: Close to threshold, result may be uncertain."
-            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Score: {avg_score:.4f} | Burstiness: {burstiness:.2f} | Model Scores: {scores}")
-        else:
-            result = detector(cleaned_text, truncation=True, max_length=512)[0]
-            score = result['score']
-            final_label = "AI" if score >= 0.95 else "Human" if score < 0.60 else "Uncertain"
-            if final_label == "Uncertain" or final_label == "Human":
-                final_label = "Human" if burstiness > 1.0 else "Uncertain"
-            avg_score = score
-            note = f"{note_lang}. {note_burst}."
-            if 0.60 <= score < 0.95:
-                note += " Warning: Close to threshold, result may be uncertain."
-            logging.info(f"Language: {detected_lang} | Text Length: {len(cleaned_text)} | Prediction: {final_label} | Score: {avg_score:.4f} | Burstiness: {burstiness:.2f}")
-        return {"prediction": final_label, "score": avg_score, "note": note}

+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
 import torch
+import math
+app = FastAPI(title="Improved AI Text Detector")
+# 1. Classifier model (better than akshayvkt)
+clf_model_name = "Hello-SimpleAI/chatgpt-detector-roberta"
+clf_tokenizer = AutoTokenizer.from_pretrained(clf_model_name)
+clf_model = AutoModelForSequenceClassification.from_pretrained(clf_model_name)
+# 2. Perplexity model (GPT-2)
+ppl_model_name = "gpt2"
+ppl_tokenizer = AutoTokenizer.from_pretrained(ppl_model_name)
+ppl_model = AutoModelForCausalLM.from_pretrained(ppl_model_name)
+class InputText(BaseModel):
     text: str
+def get_classifier_score(text: str) -> float:
+    inputs = clf_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+    with torch.no_grad():
+        outputs = clf_model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=-1)
+        ai_prob = probs[0][1].item()  # label 1 = AI
+    return ai_prob
+def get_perplexity(text: str) -> float:
+    encodings = ppl_tokenizer(text, return_tensors="pt")
+    max_length = ppl_model.config.n_positions
+    stride = 512
+    seq_len = encodings.input_ids.size(1)
+    nlls = []
+    prev_end_loc = 0
+    for begin_loc in range(0, seq_len, stride):
+        end_loc = min(begin_loc + stride, seq_len)
+        trg_len = end_loc - prev_end_loc
+        input_ids = encodings.input_ids[:, begin_loc:end_loc]
+        target_ids = input_ids.clone()
+        target_ids[:, :-trg_len] = -100
+        with torch.no_grad():
+            outputs = ppl_model(input_ids, labels=target_ids)
+            neg_log_likelihood = outputs.loss * trg_len
+        nlls.append(neg_log_likelihood)
+        prev_end_loc = end_loc
+        if end_loc == seq_len:
+            break
+    ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
+    return ppl.item()
+@app.post("/detect")
+def detect(input_text: InputText):
+    text = input_text.text.strip()
+    # Run classifier
+    clf_score = get_classifier_score(text)
+    # Run perplexity
+    ppl = get_perplexity(text)
+    # Decision rule: combine both
+    # Lower perplexity (<50) + high classifier_score (>0.7) = AI
+    if clf_score > 0.7 and ppl < 50:
+        final = "AI"
+    elif clf_score < 0.3 and ppl > 80:
+        final = "Human"
     else:
+        final = "Uncertain"
+    return {
+        "classifier_score": round(clf_score, 4),
+        "perplexity": round(ppl, 2),
+        "final_label": final
+    }

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ torch==2.4.1
 fastapi==0.115.2
 uvicorn==0.32.0
 pydantic==2.9.2
-numpy==2.0.2

 fastapi==0.115.2
 uvicorn==0.32.0
 pydantic==2.9.2
+numpy==2.0.2
+scipy==1.14.1