Spaces:

Pratham0409
/

ai-text-detector

Sleeping

App Files Files Community

Pratham0409 commited on Aug 12

Commit

2ef2e08

verified ·

1 Parent(s): 9aebba0

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -15

app.py CHANGED Viewed

@@ -1,28 +1,69 @@
 import gradio as gr
-from transformers import pipeline
-# Load the AI detection model pipeline from Hugging Face
-# We're using a well-regarded RoBERTa-based model for this task.
-pipe = pipeline("text-classification", model="openai-community/roberta-base-openai-detector")
 def detect_ai_text(text):
     """
-    Analyzes the input text and returns the model's prediction.
-    The model returns a list of dictionaries. We want the one that tells us the 'AI' score.
     """
-    results = pipe(text)
-    # The model outputs probabilities for both 'LABEL_0' (Human) and 'LABEL_1' (AI).
-    # We'll return the full results for clarity.
-    return {item['label']: item['score'] for item in results}
-# Create the Gradio interface
 iface = gr.Interface(
     fn=detect_ai_text,
-    inputs=gr.Textbox(lines=10, placeholder="Paste the text you want to analyze here..."),
     outputs="json",
-    title="AI Content Detector",
-    description="A simple API to detect AI-generated text. Powered by roberta-base-openai-detector."
 )
-# Launch the app. The `share=True` argument is what makes the API accessible.
 iface.launch()

 import gradio as gr
+from transformers import pipeline, AutoTokenizer
+# --- MODEL LOADING ---
+# Load both the pipeline and the tokenizer for the model
+# The tokenizer is needed to split the text into chunks the model can understand.
+model_name = "openai-community/roberta-base-openai-detector"
+pipe = pipeline("text-classification", model=model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 def detect_ai_text(text):
     """
+    Analyzes input text, handling long texts by chunking them into smaller pieces.
     """
+    # Get the model's max length, subtracting a few tokens for safety margin.
+    max_length = tokenizer.model_max_length - 2
+    # Tokenize the entire input text
+    tokens = tokenizer.encode(text)
+    # If the text is short enough, process it in one go.
+    if len(tokens) <= max_length:
+        results = pipe(text)
+        return {item['label']: item['score'] for item in results}
+    # --- CHUNKING LOGIC FOR LONG TEXT ---
+    # If the text is too long, we process it in overlapping chunks.
+    all_scores = []
+    # Create chunks with a 50-token overlap to maintain context between them
+    for i in range(0, len(tokens), max_length - 50):
+        chunk_tokens = tokens[i:i + max_length]
+        # Decode the chunk tokens back to a string for the pipeline
+        chunk_text = tokenizer.decode(chunk_tokens)
+        # Run the model on the chunk
+        chunk_results = pipe(chunk_text)
+        # Find the score for the 'AI_GENERATED' label (LABEL_1)
+        for item in chunk_results:
+            if item['label'] == 'LABEL_1': # LABEL_1 is the AI score
+                all_scores.append(item['score'])
+                break # Move to the next chunk
+    # If for some reason no scores were collected, return an error state.
+    if not all_scores:
+        return {"error": "Could not process text."}
+    # Average the AI scores from all chunks to get a final score
+    average_ai_score = sum(all_scores) / len(all_scores)
+    # Return the aggregated result in the same format as a single run
+    return {
+        'LABEL_1': average_ai_score, # AI score
+        'LABEL_0': 1 - average_ai_score, # Human score
+        'note': f'Result aggregated from {len(all_scores)} chunks.'
+    }
+# --- GRADIO INTERFACE ---
 iface = gr.Interface(
     fn=detect_ai_text,
+    inputs=gr.Textbox(lines=15, placeholder="Paste the text you want to analyze here..."),
     outputs="json",
+    title="AI Content Detector (Robust Version)",
+    description="This version handles long texts by breaking them into chunks. It analyzes text for AI generation using the roberta-base-openai-detector model."
 )
+# Launch the app
 iface.launch()