Spaces:

VOIDER
/

rsinema-aesthetic-scorer

Sleeping

App Files Files Community

VOIDER commited on Jul 28

Commit

d4c04db

verified ·

1 Parent(s): 9d90ffe

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -22

app.py CHANGED Viewed

@@ -9,14 +9,10 @@ import os
 from collections import OrderedDict
 # --- 1. Define the Full Model Architecture ---
-# This class's structure is matched to the keys in the downloaded state_dict.
 class AestheticScorerModel(nn.Module):
     def __init__(self, clip_model_id, embedding_dim):
         super().__init__()
-        # Use the base CLIPVisionModel, which provides the correct pooler_output dimension
         self.backbone = CLIPVisionModel.from_pretrained(clip_model_id)
-        # Define the seven prediction heads
         self.aesthetic_head = nn.Sequential(nn.Linear(embedding_dim, 1))
         self.quality_head = nn.Sequential(nn.Linear(embedding_dim, 1))
         self.composition_head = nn.Sequential(nn.Linear(embedding_dim, 1))
@@ -26,11 +22,8 @@ class AestheticScorerModel(nn.Module):
         self.content_head = nn.Sequential(nn.Linear(embedding_dim, 1))
     def forward(self, pixel_values):
-        # Use the un-projected 'pooler_output' which has dimension 768
         outputs = self.backbone(pixel_values=pixel_values)
         embedding = outputs.pooler_output
-        # Calculate and concatenate scores from each head
         scores = torch.cat([
             self.aesthetic_head(embedding), self.quality_head(embedding),
             self.composition_head(embedding), self.light_head(embedding),
@@ -44,14 +37,13 @@ print("Loading model and processor...")
 CACHE_DIR = "hf_cache"
 MODEL_REPO_ID = "rsinema/aesthetic-scorer"
 CLIP_MODEL_ID = "openai/clip-vit-base-patch32"
-EMBEDDING_DIM = 768 # This is the hidden_size of the base model
 processor = CLIPProcessor.from_pretrained(CLIP_MODEL_ID, cache_dir=CACHE_DIR)
 model = AestheticScorerModel(clip_model_id=CLIP_MODEL_ID, embedding_dim=EMBEDDING_DIM)
 model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="model.pt", cache_dir=CACHE_DIR)
 state_dict = torch.load(model_path, map_location=torch.device('cpu'))
-# Key renaming logic to align saved weights with the model's structure
 corrected_state_dict = OrderedDict()
 for key, value in state_dict.items():
     if key.startswith('backbone.'):
@@ -64,15 +56,11 @@ model.load_state_dict(corrected_state_dict, strict=False)
 model.eval()
 print("Model and processor loaded successfully.")
-# Define the aesthetic categories in the correct order for table headers
 AESTHETIC_CATEGORIES = ["Overall", "Quality", "Composition", "Lighting", "Color", "Depth of Field", "Content"]
 TABLE_HEADERS = ["Preview", "File"] + AESTHETIC_CATEGORIES
 # --- 3. Core Processing Function ---
 def score_images(files):
-    """
-    Processes uploaded images, scores them, and returns a DataFrame with image previews.
-    """
     if not files:
         return pd.DataFrame(columns=TABLE_HEADERS)
@@ -86,8 +74,9 @@ def score_images(files):
                 inputs = processor(images=image, return_tensors="pt")
                 scores = model(**inputs)[0]
-            # Create a dictionary for the current image's scores, including the preview path
-            image_scores = {"Preview": file_path, "File": filename}
             for category, score in zip(AESTHETIC_CATEGORIES, scores):
                 image_scores[category] = f"{score.item():.2f} / 5"
             results_list.append(image_scores)
@@ -97,11 +86,9 @@ def score_images(files):
             error_row = {"Preview": None, "File": filename, **{cat: "Processing Error" for cat in AESTHETIC_CATEGORIES}}
             results_list.append(error_row)
-    # Create DataFrame with specified column order
     return pd.DataFrame(results_list, columns=TABLE_HEADERS)
 def clear_all():
-    """Returns None to clear all specified components."""
     return None, None
 # --- 4. Gradio Interface using Blocks for Layout Control ---
@@ -110,7 +97,6 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}")
     gr.Markdown("Upload one or more images to compare their aesthetic scores across seven distinct categories. This application uses the **rsinema/aesthetic-scorer** model.")
     with gr.Column():
-        # Input section
         file_uploader = gr.File(
             label="Upload Images",
             file_count="multiple",
@@ -120,16 +106,15 @@ with gr.Blocks(theme=gr.themes.Soft(), css="footer {display: none !important}")
             clear_button = gr.Button("Clear")
             submit_button = gr.Button("Submit", variant="primary")
-        # Output section is now below the inputs
         output_df = gr.Dataframe(
             headers=TABLE_HEADERS,
             label="Aesthetic Scores Comparison",
             interactive=False,
-            # Set column widths to make the Preview column larger
-            column_widths=[20, 30] + [10] * len(AESTHETIC_CATEGORIES)
         )
-    # Event listeners
     submit_button.click(fn=score_images, inputs=file_uploader, outputs=output_df)
     clear_button.click(fn=clear_all, inputs=None, outputs=[file_uploader, output_df])

 from collections import OrderedDict
 # --- 1. Define the Full Model Architecture ---
 class AestheticScorerModel(nn.Module):
     def __init__(self, clip_model_id, embedding_dim):
         super().__init__()
         self.backbone = CLIPVisionModel.from_pretrained(clip_model_id)
         self.aesthetic_head = nn.Sequential(nn.Linear(embedding_dim, 1))
         self.quality_head = nn.Sequential(nn.Linear(embedding_dim, 1))
         self.composition_head = nn.Sequential(nn.Linear(embedding_dim, 1))
         self.content_head = nn.Sequential(nn.Linear(embedding_dim, 1))
     def forward(self, pixel_values):
         outputs = self.backbone(pixel_values=pixel_values)
         embedding = outputs.pooler_output
         scores = torch.cat([
             self.aesthetic_head(embedding), self.quality_head(embedding),
             self.composition_head(embedding), self.light_head(embedding),
 CACHE_DIR = "hf_cache"
 MODEL_REPO_ID = "rsinema/aesthetic-scorer"
 CLIP_MODEL_ID = "openai/clip-vit-base-patch32"
+EMBEDDING_DIM = 768
 processor = CLIPProcessor.from_pretrained(CLIP_MODEL_ID, cache_dir=CACHE_DIR)
 model = AestheticScorerModel(clip_model_id=CLIP_MODEL_ID, embedding_dim=EMBEDDING_DIM)
 model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename="model.pt", cache_dir=CACHE_DIR)
 state_dict = torch.load(model_path, map_location=torch.device('cpu'))
 corrected_state_dict = OrderedDict()
 for key, value in state_dict.items():
     if key.startswith('backbone.'):
 model.eval()
 print("Model and processor loaded successfully.")
 AESTHETIC_CATEGORIES = ["Overall", "Quality", "Composition", "Lighting", "Color", "Depth of Field", "Content"]
 TABLE_HEADERS = ["Preview", "File"] + AESTHETIC_CATEGORIES
 # --- 3. Core Processing Function ---
 def score_images(files):
     if not files:
         return pd.DataFrame(columns=TABLE_HEADERS)
                 inputs = processor(images=image, return_tensors="pt")
                 scores = model(**inputs)[0]
+            # --- THIS IS THE CORRECTED LINE ---
+            # Provide the image data as a (filepath, alt_text) tuple for rendering.
+            image_scores = {"Preview": (file_path, filename), "File": filename}
             for category, score in zip(AESTHETIC_CATEGORIES, scores):
                 image_scores[category] = f"{score.item():.2f} / 5"
             results_list.append(image_scores)
             error_row = {"Preview": None, "File": filename, **{cat: "Processing Error" for cat in AESTHETIC_CATEGORIES}}
             results_list.append(error_row)
     return pd.DataFrame(results_list, columns=TABLE_HEADERS)
 def clear_all():
     return None, None
 # --- 4. Gradio Interface using Blocks for Layout Control ---
     gr.Markdown("Upload one or more images to compare their aesthetic scores across seven distinct categories. This application uses the **rsinema/aesthetic-scorer** model.")
     with gr.Column():
         file_uploader = gr.File(
             label="Upload Images",
             file_count="multiple",
             clear_button = gr.Button("Clear")
             submit_button = gr.Button("Submit", variant="primary")
         output_df = gr.Dataframe(
             headers=TABLE_HEADERS,
             label="Aesthetic Scores Comparison",
             interactive=False,
+            # Specify the data type for the Preview column as 'image'
+            datatype=["image", "str"] + ["str"] * len(AESTHETIC_CATEGORIES),
+            column_widths=[15, 35] + [10] * len(AESTHETIC_CATEGORIES)
         )
     submit_button.click(fn=score_images, inputs=file_uploader, outputs=output_df)
     clear_button.click(fn=clear_all, inputs=None, outputs=[file_uploader, output_df])