Spaces:

VOIDER
/

image-evaluation-tool

Running

App Files Files Community

VOIDER commited on Jul 19

Commit

51aec78

verified ·

1 Parent(s): 83b7522

Upload 3 files

Browse files

Files changed (3) hide show

app.py +85 -139
requirements.txt +13 -14
scoring.py +77 -0

app.py CHANGED Viewed

@@ -8,8 +8,6 @@ import base64
 from typing import List, Dict, Tuple, Optional
 import logging
 from pathlib import Path
-import tempfile
-import os
 import random
 # Simplified imports for testing
@@ -34,9 +32,17 @@ except ImportError as e:
     class MockEvaluator:
         def __init__(self):
             pass
-        def evaluate(self, *args, **kwargs):
-            return random.uniform(5.0, 9.0)
     QualityEvaluator = MockEvaluator
     AestheticsEvaluator = MockEvaluator
     PromptEvaluator = MockEvaluator
@@ -45,11 +51,8 @@ except ImportError as e:
     def extract_png_metadata(path):
         return None
-    def calculate_final_score(quality, aesthetics, prompt, ai_detection, has_prompt=True):
-        if has_prompt:
-            return (quality * 0.25 + aesthetics * 0.35 + prompt * 0.25 + (1-ai_detection) * 0.15)
-        else:
-            return (quality * 0.375 + aesthetics * 0.475 + (1-ai_detection) * 0.15)
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -101,24 +104,11 @@ class ImageEvaluationApp:
     ) -> Tuple[pd.DataFrame, str]:
         """
         Evaluate uploaded images and return results
-        Args:
-            images: List of image file paths
-            enable_quality: Whether to evaluate image quality
-            enable_aesthetics: Whether to evaluate aesthetics
-            enable_prompt: Whether to evaluate prompt following
-            enable_ai_detection: Whether to detect AI generation
-            anime_mode: Whether to use anime-specific models
-            progress: Gradio progress tracker
-        Returns:
-            Tuple of (results_dataframe, status_message)
         """
         if not images:
             return pd.DataFrame(), "No images uploaded."
         try:
-            # Load models based on selection
             selected_models = {
                 'quality': enable_quality,
                 'aesthetics': enable_aesthetics,
@@ -137,42 +127,33 @@ class ImageEvaluationApp:
                         desc=f"Evaluating image {i+1}/{total_images}")
                 try:
-                    # Load image
                     image = Image.open(image_path).convert('RGB')
                     filename = Path(image_path).name
-                    # Extract metadata
                     metadata = extract_png_metadata(image_path)
                     prompt = metadata.get('prompt', '') if metadata else ''
-                    # Initialize scores
                     scores = {
                         'filename': filename,
                         'quality_score': 0.0,
                         'aesthetics_score': 0.0,
                         'prompt_score': 0.0,
                         'ai_detection_score': 0.0,
-                        'has_prompt': bool(prompt),
-                        'prompt_text': prompt[:100] + '...' if len(prompt) > 100 else prompt
                     }
-                    # Evaluate quality
                     if enable_quality and self.quality_evaluator:
-                        scores['quality_score'] = self.quality_evaluator.evaluate(image, anime_mode)
-                    # Evaluate aesthetics
                     if enable_aesthetics and self.aesthetics_evaluator:
-                        scores['aesthetics_score'] = self.aesthetics_evaluator.evaluate(image, anime_mode)
-                    # Evaluate prompt following (only if prompt available)
                     if enable_prompt and self.prompt_evaluator and prompt:
                         scores['prompt_score'] = self.prompt_evaluator.evaluate(image, prompt)
-                    # Evaluate AI detection
                     if enable_ai_detection and self.ai_detection_evaluator:
                         scores['ai_detection_score'] = self.ai_detection_evaluator.evaluate(image)
-                    # Calculate final score
                     scores['final_score'] = calculate_final_score(
                         scores['quality_score'],
                         scores['aesthetics_score'],
@@ -181,177 +162,142 @@ class ImageEvaluationApp:
                         scores['has_prompt']
                     )
-                    # Create thumbnail for display
                     thumbnail = image.copy()
-                    thumbnail.thumbnail((150, 150), Image.Resampling.LANCZOS)
-                    # Convert thumbnail to base64 for display
                     buffer = io.BytesIO()
                     thumbnail.save(buffer, format='PNG')
                     thumbnail_b64 = base64.b64encode(buffer.getvalue()).decode()
-                    scores['thumbnail'] = f"data:image/png;base64,{thumbnail_b64}"
                     results.append(scores)
                 except Exception as e:
                     logger.error(f"Error evaluating {image_path}: {str(e)}")
-                    # Add error entry
                     results.append({
                         'filename': Path(image_path).name,
-                        'quality_score': 0.0,
-                        'aesthetics_score': 0.0,
-                        'prompt_score': 0.0,
-                        'ai_detection_score': 0.0,
-                        'final_score': 0.0,
-                        'has_prompt': False,
-                        'prompt_text': f"Error: {str(e)}",
-                        'thumbnail': ""
                     })
-            # Create DataFrame and sort by final score
             df = pd.DataFrame(results)
-            if not df.empty:
-                df = df.sort_values('final_score', ascending=False).reset_index(drop=True)
-                df.index = df.index + 1  # Start ranking from 1
-                df.index.name = 'Rank'
-            progress(1.0, desc="Evaluation complete!")
-            status_msg = f"Successfully evaluated {len(results)} images."
-            if any('Error:' in str(r.get('prompt_text', '')) for r in results):
-                error_count = sum(1 for r in results if 'Error:' in str(r.get('prompt_text', '')))
                 status_msg += f" {error_count} images had evaluation errors."
-            return df, status_msg
         except Exception as e:
             logger.error(f"Error in evaluate_images: {str(e)}")
             return pd.DataFrame(), f"Error during evaluation: {str(e)}"
 def create_interface():
-    """Create and configure the Gradio interface"""
     app = ImageEvaluationApp()
-    # Custom CSS for better styling
     css = """
-    .gradio-container {
-        max-width: 1200px !important;
-    }
-    .results-table {
-        font-size: 12px;
-    }
-    .thumbnail-cell img {
-        max-width: 100px;
-        max-height: 100px;
-        object-fit: cover;
-    }
     """
     with gr.Blocks(css=css, title="AI Image Evaluation Tool") as interface:
-        gr.Markdown("""
-        # 🎨 AI Image Evaluation Tool
-        Upload your AI-generated images to evaluate their quality, aesthetics, prompt following, and detect AI generation.
-        Supports realistic, anime, and art styles with multiple SOTA models.
-        """)
         with gr.Row():
             with gr.Column(scale=1):
-                # File upload
-                images_input = gr.File(
-                    label="Upload Images",
-                    file_count="multiple",
-                    file_types=["image"],
-                    height=200
-                )
-                # Model selection
                 gr.Markdown("### Model Selection")
                 with gr.Row():
                     enable_quality = gr.Checkbox(label="Image Quality", value=True)
                     enable_aesthetics = gr.Checkbox(label="Aesthetics", value=True)
                 with gr.Row():
                     enable_prompt = gr.Checkbox(label="Prompt Following", value=True)
                     enable_ai_detection = gr.Checkbox(label="AI Detection", value=True)
-                # Additional options
                 gr.Markdown("### Options")
                 anime_mode = gr.Checkbox(label="Anime/Art Mode", value=False)
-                # Evaluate button
                 evaluate_btn = gr.Button("🚀 Evaluate Images", variant="primary", size="lg")
-                # Status
                 status_output = gr.Textbox(label="Status", interactive=False)
-            with gr.Column(scale=2):
-                # Results display
                 gr.Markdown("### 📊 Evaluation Results")
                 results_output = gr.Dataframe(
-                    headers=["Rank", "Filename", "Quality", "Aesthetics", "Prompt", "AI Detection", "Final Score", "Thumbnail"],
-                    datatype=["number", "str", "number", "number", "number", "number", "number", "str"],
                     label="Results",
                     interactive=False,
                     wrap=True,
                     elem_classes=["results-table"]
                 )
-        # Event handlers
         evaluate_btn.click(
             fn=app.evaluate_images,
-            inputs=[
-                images_input,
-                enable_quality,
-                enable_aesthetics,
-                enable_prompt,
-                enable_ai_detection,
-                anime_mode
-            ],
-            outputs=[results_output, status_output],
-            show_progress=True
         )
-        # Examples and help
         with gr.Accordion("ℹ️ Help & Information", open=False):
             gr.Markdown("""
             ### How to Use
-            1. **Upload Images**: Select multiple PNG/JPG images (max 50MB each)
-            2. **Select Models**: Choose which evaluation metrics to use
-            3. **Anime Mode**: Enable for better evaluation of anime/art style images
-            4. **Evaluate**: Click the button to start evaluation
             ### Scoring System
-            - **Quality Score**: Technical image quality (0-10)
-            - **Aesthetics Score**: Visual appeal and composition (0-10)
-            - **Prompt Score**: How well the image follows the text prompt (0-10, requires metadata)
-            - **AI Detection**: Probability of being AI-generated (0-1, lower is better)
-            - **Final Score**: Weighted combination of all metrics (0-10)
-            ### Supported Formats
-            - PNG files with A1111/ComfyUI metadata (for prompt evaluation)
-            - JPG, PNG, WebP images (for other evaluations)
-            - Batch processing of 10-100+ images
-            ### Models Used
-            - **Quality**: LAR-IQA, DGIQA
-            - **Aesthetics**: UNIAA, MUSIQ
-            - **Prompt Following**: CLIP, BLIP-2
-            - **AI Detection**: Sentry-Image, Custom ensemble
             """)
     return interface
 if __name__ == "__main__":
-    # Create the interface
     interface = create_interface()
-    # Launch the app
-    interface.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True
-    )

 from typing import List, Dict, Tuple, Optional
 import logging
 from pathlib import Path
 import random
 # Simplified imports for testing
     class MockEvaluator:
         def __init__(self):
             pass
+        # FIX: Make mock evaluation deterministic based on image content
+        def evaluate(self, image: Image.Image, *args, **kwargs):
+            try:
+                img_bytes = image.tobytes()
+                img_hash = hash(img_bytes)
+                random.seed(img_hash)
+                # Return a consistent score for the same image
+                return random.uniform(5.0, 9.5)
+            except Exception:
+                return random.uniform(5.0, 9.5) # Fallback for any error
     QualityEvaluator = MockEvaluator
     AestheticsEvaluator = MockEvaluator
     PromptEvaluator = MockEvaluator
     def extract_png_metadata(path):
         return None
+    # Use the corrected scoring logic from scoring.py
+    from scoring import calculate_final_score
 # Configure logging
 logging.basicConfig(level=logging.INFO)
     ) -> Tuple[pd.DataFrame, str]:
         """
         Evaluate uploaded images and return results
         """
         if not images:
             return pd.DataFrame(), "No images uploaded."
         try:
             selected_models = {
                 'quality': enable_quality,
                 'aesthetics': enable_aesthetics,
                         desc=f"Evaluating image {i+1}/{total_images}")
                 try:
                     image = Image.open(image_path).convert('RGB')
                     filename = Path(image_path).name
                     metadata = extract_png_metadata(image_path)
                     prompt = metadata.get('prompt', '') if metadata else ''
                     scores = {
                         'filename': filename,
                         'quality_score': 0.0,
                         'aesthetics_score': 0.0,
                         'prompt_score': 0.0,
                         'ai_detection_score': 0.0,
+                        'has_prompt': bool(prompt)
                     }
                     if enable_quality and self.quality_evaluator:
+                        scores['quality_score'] = self.quality_evaluator.evaluate(image, anime_mode=anime_mode)
                     if enable_aesthetics and self.aesthetics_evaluator:
+                        scores['aesthetics_score'] = self.aesthetics_evaluator.evaluate(image, anime_mode=anime_mode)
                     if enable_prompt and self.prompt_evaluator and prompt:
                         scores['prompt_score'] = self.prompt_evaluator.evaluate(image, prompt)
                     if enable_ai_detection and self.ai_detection_evaluator:
                         scores['ai_detection_score'] = self.ai_detection_evaluator.evaluate(image)
                     scores['final_score'] = calculate_final_score(
                         scores['quality_score'],
                         scores['aesthetics_score'],
                         scores['has_prompt']
                     )
                     thumbnail = image.copy()
+                    thumbnail.thumbnail((100, 100), Image.Resampling.LANCZOS)
                     buffer = io.BytesIO()
                     thumbnail.save(buffer, format='PNG')
                     thumbnail_b64 = base64.b64encode(buffer.getvalue()).decode()
+                    # FIX: Use markdown format for Gradio dataframe image display
+                    scores['thumbnail'] = f"![{filename}](data:image/png;base64,{thumbnail_b64})"
                     results.append(scores)
                 except Exception as e:
                     logger.error(f"Error evaluating {image_path}: {str(e)}")
                     results.append({
                         'filename': Path(image_path).name,
+                        'error': str(e),
+                        'thumbnail': ''
                     })
+            if not results:
+                 return pd.DataFrame(), "Evaluation failed for all images."
             df = pd.DataFrame(results)
+            # FIX: Create a display-ready dataframe with proper formatting and column names
+            if not df.empty:
+                # Separate error rows
+                error_df = df[df['final_score'].isna()]
+                valid_df = df.dropna(subset=['final_score'])
+                if not valid_df.empty:
+                    valid_df = valid_df.sort_values('final_score', ascending=False).reset_index(drop=True)
+                    valid_df.index = valid_df.index + 1
+                    valid_df = valid_df.reset_index().rename(columns={'index': 'Rank'})
+                    # Format columns for display
+                    display_cols = {
+                        'Rank': 'Rank',
+                        'thumbnail': 'Thumbnail',
+                        'filename': 'Filename',
+                        'final_score': 'Final Score',
+                        'quality_score': 'Quality',
+                        'aesthetics_score': 'Aesthetics',
+                        'prompt_score': 'Prompt',
+                        'ai_detection_score': 'AI Detection'
+                    }
+                    display_df = valid_df[list(display_cols.keys())]
+                    display_df = display_df.rename(columns=display_cols)
+                    # Apply formatting
+                    for col in ['Final Score', 'Quality', 'Aesthetics', 'Prompt']:
+                        display_df[col] = display_df[col].map('{:.2f}'.format)
+                    display_df['AI Detection'] = display_df['AI Detection'].map('{:.1%}'.format)
+                else:
+                    display_df = pd.DataFrame()
+            status_msg = f"Successfully evaluated {len(df[df['final_score'].notna()])} images."
+            error_count = len(df[df['final_score'].isna()])
+            if error_count > 0:
                 status_msg += f" {error_count} images had evaluation errors."
+            return display_df, status_msg
         except Exception as e:
             logger.error(f"Error in evaluate_images: {str(e)}")
             return pd.DataFrame(), f"Error during evaluation: {str(e)}"
 def create_interface():
     app = ImageEvaluationApp()
     css = """
+    .gradio-container { max-width: 1400px !important; }
+    .results-table { font-size: 14px; }
+    .results-table .thumbnail-cell img { max-width: 100px; max-height: 100px; object-fit: cover; }
     """
     with gr.Blocks(css=css, title="AI Image Evaluation Tool") as interface:
+        gr.Markdown("# 🎨 AI Image Evaluation Tool")
+        gr.Markdown("Upload your AI-generated images to evaluate their quality, aesthetics, prompt following, and detect AI generation.")
         with gr.Row():
             with gr.Column(scale=1):
+                images_input = gr.File(label="Upload Images", file_count="multiple", file_types=["image"], height=200)
                 gr.Markdown("### Model Selection")
                 with gr.Row():
                     enable_quality = gr.Checkbox(label="Image Quality", value=True)
                     enable_aesthetics = gr.Checkbox(label="Aesthetics", value=True)
                 with gr.Row():
                     enable_prompt = gr.Checkbox(label="Prompt Following", value=True)
                     enable_ai_detection = gr.Checkbox(label="AI Detection", value=True)
                 gr.Markdown("### Options")
                 anime_mode = gr.Checkbox(label="Anime/Art Mode", value=False)
                 evaluate_btn = gr.Button("🚀 Evaluate Images", variant="primary", size="lg")
                 status_output = gr.Textbox(label="Status", interactive=False)
+            with gr.Column(scale=3):
                 gr.Markdown("### 📊 Evaluation Results")
+                # FIX: Update headers and datatypes to match the new formatted DataFrame
                 results_output = gr.Dataframe(
+                    headers=["Rank", "Thumbnail", "Filename", "Final Score", "Quality", "Aesthetics", "Prompt", "AI Detection"],
+                    datatype=["number", "markdown", "str", "str", "str", "str", "str", "str"],
                     label="Results",
                     interactive=False,
                     wrap=True,
                     elem_classes=["results-table"]
                 )
         evaluate_btn.click(
             fn=app.evaluate_images,
+            inputs=[images_input, enable_quality, enable_aesthetics, enable_prompt, enable_ai_detection, anime_mode],
+            outputs=[results_output, status_output]
         )
         with gr.Accordion("ℹ️ Help & Information", open=False):
+            # Help text remains the same as it describes the intended functionality
             gr.Markdown("""
             ### How to Use
+            1. **Upload Images**: Select multiple PNG/JPG images.
+            2. **Select Models**: Choose which evaluation metrics to use.
+            3. **Anime Mode**: Enable for better evaluation of anime/art style images.
+            4. **Evaluate**: Click the button to start evaluation.
             ### Scoring System
+            - **Quality Score**: Technical image quality (0-10).
+            - **Aesthetics Score**: Visual appeal and composition (0-10).
+            - **Prompt Score**: How well the image follows the text prompt (0-10, requires metadata).
+            - **AI Detection**: Probability of being AI-generated (0-1, lower is better for the final score).
+            - **Final Score**: Weighted combination of all metrics (0-10).
             """)
     return interface
 if __name__ == "__main__":
     interface = create_interface()
+    interface.launch(server_name="0.0.0.0", server_port=7860, show_error=True)

requirements.txt CHANGED Viewed

@@ -2,18 +2,17 @@ gradio>=4.0.0
 Pillow>=9.0.0
 numpy>=1.21.0
 pandas>=1.3.0
-scipy>=1.9.0
-# Optional dependencies for full functionality
-# Uncomment these for production deployment with real models
-# torch>=2.0.0
-# torchvision>=0.15.0
-# transformers>=4.30.0
-# opencv-python>=4.5.0
-# scikit-image>=0.19.0
-# huggingface-hub>=0.15.0
-# accelerate>=0.20.0
-# timm>=0.9.0
-# sentence-transformers>=2.2.0
-# git+https://github.com/openai/CLIP.git

 Pillow>=9.0.0
 numpy>=1.21.0
 pandas>=1.3.0
+Optional dependencies for full functionality
+Uncomment these for production deployment with real models
+torch>=2.0.0
+torchvision>=0.15.0
+transformers>=4.30.0
+opencv-python>=4.5.0
+scikit-image>=0.19.0
+huggingface-hub>=0.15.0
+accelerate>=0.20.0
+timm>=0.9.0
+sentence-transformers>=2.2.0
+git+https://github.com/openai/CLIP.git
+scipy>=1.9.0

scoring.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import numpy as np
+import logging
+logger = logging.getLogger(__name__)
+def calculate_final_score(
+    quality_score: float,
+    aesthetics_score: float,
+    prompt_score: float,
+    ai_detection_score: float,
+    has_prompt: bool = True
+) -> float:
+    """
+    Calculate weighted composite score for image evaluation.
+    Args:
+        quality_score: Technical image quality (0-10)
+        aesthetics_score: Visual appeal score (0-10)
+        prompt_score: Prompt adherence score (0-10)
+        ai_detection_score: AI generation probability (0-1)
+        has_prompt: Whether prompt metadata is available
+    Returns:
+        Final composite score (0-10)
+    """
+    try:
+        # Validate and clamp input scores
+        quality_score = max(0.0, min(10.0, quality_score))
+        aesthetics_score = max(0.0, min(10.0, aesthetics_score))
+        prompt_score = max(0.0, min(10.0, prompt_score))
+        ai_detection_score = max(0.0, min(1.0, ai_detection_score))
+        # FIX: Invert and scale the AI detection score to a 0-10 range
+        # A low AI detection probability (good) results in a high score.
+        inverted_ai_score = (1 - ai_detection_score) * 10
+        if has_prompt:
+            # Standard weights when prompt is available
+            weights = {
+                'quality': 0.25,      # 25% - Technical quality
+                'aesthetics': 0.35,   # 35% - Visual appeal (highest weight)
+                'prompt': 0.25,       # 25% - Prompt following
+                'ai_detection': 0.15  # 15% - Authenticity (inverted detection score)
+            }
+            # FIX: Correctly calculate the weighted score. The sum of weights is 1.0.
+            score = (
+                quality_score * weights['quality'] +
+                aesthetics_score * weights['aesthetics'] +
+                prompt_score * weights['prompt'] +
+                inverted_ai_score * weights['ai_detection']
+            )
+        else:
+            # Redistribute prompt weight when no prompt available
+            weights = {
+                'quality': 0.375,     # 25% + 12.5% from prompt
+                'aesthetics': 0.475,  # 35% + 12.5% from prompt
+                'ai_detection': 0.15  # 15% - Authenticity
+            }
+            # FIX: Correctly calculate the weighted score without prompt. Sum of weights is 1.0.
+            score = (
+                quality_score * weights['quality'] +
+                aesthetics_score * weights['aesthetics'] +
+                inverted_ai_score * weights['ai_detection']
+            )
+        # Ensure final score is within the valid 0-10 range
+        final_score = max(0.0, min(10.0, score))
+        logger.debug(f"Score calculation - Final: {final_score:.2f}")
+        return final_score
+    except Exception as e:
+        logger.error(f"Error calculating final score: {str(e)}")
+        return 0.0 # Return 0.0 on error to clearly indicate failure