Flux_Prompt_Optimizer

Running on Zero

App Files Files Community

Malaji71 commited on Jun 12

Commit

9abf097

verified ·

1 Parent(s): e53d7f7

Update models.py

Browse files

Files changed (1) hide show

models.py +137 -49

models.py CHANGED Viewed

@@ -7,6 +7,7 @@ import spaces
 import logging
 import tempfile
 import os
 from typing import Optional, Dict, Any, Tuple
 from PIL import Image
 from gradio_client import Client, handle_file
@@ -82,38 +83,121 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
             return False
     def _extract_camera_setup(self, description: str) -> Optional[str]:
-        """Extract camera setup recommendation from BAGEL response"""
         try:
-            # Look for CAMERA_SETUP section
             if "CAMERA_SETUP:" in description:
                 parts = description.split("CAMERA_SETUP:")
                 if len(parts) > 1:
-                    camera_part = parts[1].strip()
-                    # Clean up any additional formatting
-                    camera_part = camera_part.replace("\n", " ").strip()
-                    return camera_part
-            # Alternative patterns for camera recommendations
             camera_patterns = [
-                "Shot on ",
-                "Camera: ",
-                "Equipment: ",
-                "Recommended camera:",
-                "Camera setup:"
             ]
             for pattern in camera_patterns:
-                if pattern in description:
-                    # Extract text after the pattern
-                    idx = description.find(pattern)
-                    camera_text = description[idx:].split('.')[0]  # Take first sentence
-                    if len(camera_text) > len(pattern) + 10:  # Ensure meaningful content
-                        return camera_text.strip()
             return None
         except Exception as e:
-            logger.warning(f"Failed to extract camera setup: {e}")
             return None
     def _save_temp_image(self, image: Image.Image) -> str:
@@ -165,22 +249,24 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
         }
         try:
-            # Default prompt for detailed image analysis
             if prompt is None:
-                prompt = """You are analyzing a photograph for FLUX image generation. Provide a detailed analysis in two sections:
-1. DESCRIPTION: Start directly with the subject (e.g., "A color photograph showing..." or "A black and white photograph depicting..."). First, determine if this is a photograph, illustration, or artwork. Then describe the visual elements, composition, lighting, colors (be specific about the color palette - warm tones, cool tones, monochrome, etc.), artistic style, mood, and atmosphere. Also mention the image format/aspect ratio (square, portrait, landscape, widescreen, etc.) and how the composition uses this format. Write as a flowing paragraph without numbered lists.
-2. CAMERA_SETUP: Based on the photographic characteristics, scene type, and aspect ratio you observe, recommend the specific camera system and lens that would realistically capture this type of scene:
-- For street/documentary photography: suggest cameras like Canon EOS R6, Sony A7 IV, Leica Q2 with 35mm or 24-70mm lenses
-- For portraits: suggest cameras like Canon EOS R5, Sony A7R V with 85mm or 135mm lenses
-- For landscapes/widescreen: suggest cameras like Phase One XT, Fujifilm GFX with wide-angle lenses (16-35mm, 24-70mm)
-- For sports/action: suggest cameras like Canon EOS-1D X, Sony A9 III with telephoto lenses
-- For macro: suggest specialized macro lenses
-- For cinematic/widescreen formats: suggest cinema cameras or full-frame with appropriate aspect ratios
-Be specific about focal length, aperture, and shooting style based on what you actually see in the image dimensions and content.
-Analyze carefully and be accurate about colors, image type, and proportions."""
             # Save image to temporary file
             temp_path = self._save_temp_image(image)
@@ -195,7 +281,7 @@ Analyze carefully and be accurate about colors, image type, and proportions."""
                 prompt=prompt,
                 show_thinking=False,
                 do_sample=False,
-                text_temperature=0.3,
                 max_new_tokens=512,
                 api_name=self.api_endpoint
             )
@@ -206,17 +292,19 @@ Analyze carefully and be accurate about colors, image type, and proportions."""
             else:
                 description = str(result)
-            # Clean up the description and extract camera setup if present
             if isinstance(description, str) and description.strip():
                 description = description.strip()
-                # Store camera setup separately if found
                 camera_setup = self._extract_camera_setup(description)
                 if camera_setup:
                     metadata["camera_setup"] = camera_setup
                     metadata["has_camera_suggestion"] = True
                 else:
                     metadata["has_camera_suggestion"] = False
             else:
                 description = "Detailed image analysis completed successfully"
                 metadata["has_camera_suggestion"] = False
@@ -226,7 +314,7 @@ Analyze carefully and be accurate about colors, image type, and proportions."""
                 "response_length": len(description)
             })
-            logger.info(f"BAGEL API analysis complete: {len(description)} characters")
             return description, metadata
         except Exception as e:
@@ -240,22 +328,22 @@ Analyze carefully and be accurate about colors, image type, and proportions."""
     def analyze_for_flux_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
         """Analyze image specifically for FLUX prompt generation"""
-        flux_prompt = """You are analyzing a photograph for professional FLUX generation. Provide two sections:
-1. DESCRIPTION: Determine first if this is a real photograph, digital artwork, or illustration. Then create a detailed, flowing description starting directly with the subject. Be precise about:
-- Image type (photograph, illustration, artwork)
-- Color palette (specify if color or black/white, warm/cool tones, specific colors)
-- Photographic style (street, portrait, landscape, documentary, artistic, etc.)
-- Composition, lighting, mood, and atmosphere
-Write as a single coherent paragraph.
-2. CAMERA_SETUP: Recommend specific professional equipment that would realistically capture this exact scene:
-- Street/urban scenes: Canon EOS R6, Sony A7 IV, Leica Q2 with 24-70mm f/2.8 or 35mm f/1.4
-- Portraits: Canon EOS R5, Sony A7R V, Hasselblad X2D with 85mm f/1.4 or 135mm f/2
-- Landscapes: Phase One XT, Fujifilm GFX 100S with 16-35mm f/2.8 or 40mm f/4
-- Documentary: Canon EOS-1D X, Sony A9 III with 24-105mm f/4 or 70-200mm f/2.8
-- Action/Sports: Canon EOS R3, Sony A1 with 300mm f/2.8 or 400mm f/2.8
-Match the equipment to what you actually observe in the scene type and shooting conditions."""
         return self.analyze_image(image, flux_prompt)

 import logging
 import tempfile
 import os
+import re
 from typing import Optional, Dict, Any, Tuple
 from PIL import Image
 from gradio_client import Client, handle_file
             return False
     def _extract_camera_setup(self, description: str) -> Optional[str]:
+        """Extract camera setup recommendation from BAGEL response with improved parsing"""
         try:
+            # Look for CAMERA_SETUP section first
             if "CAMERA_SETUP:" in description:
                 parts = description.split("CAMERA_SETUP:")
                 if len(parts) > 1:
+                    camera_section = parts[1].strip()
+                    # Take the first meaningful sentence from camera setup
+                    camera_text = camera_section.split('\n')[0].strip()
+                    if len(camera_text) > 20:  # Ensure meaningful content
+                        return self._parse_camera_recommendation(camera_text)
+            # Look for "2. CAMERA_SETUP" pattern
+            if "2. CAMERA_SETUP" in description:
+                parts = description.split("2. CAMERA_SETUP")
+                if len(parts) > 1:
+                    camera_section = parts[1].strip()
+                    camera_text = camera_section.split('\n')[0].strip()
+                    if len(camera_text) > 20:
+                        return self._parse_camera_recommendation(camera_text)
+            # Look for camera recommendations within the text
+            camera_recommendation = self._find_camera_recommendation(description)
+            if camera_recommendation:
+                return camera_recommendation
+            return None
+        except Exception as e:
+            logger.warning(f"Failed to extract camera setup: {e}")
+            return None
+    def _parse_camera_recommendation(self, camera_text: str) -> Optional[str]:
+        """Parse and extract specific camera and lens information"""
+        try:
+            # Remove common prefixes and clean text
+            camera_text = re.sub(r'^(Based on.*?recommend|I would recommend|For this.*?recommend)\s*', '', camera_text, flags=re.IGNORECASE)
+            camera_text = re.sub(r'^(using a|use a|cameras? like)\s*', '', camera_text, flags=re.IGNORECASE)
+            # Extract camera model with specific patterns
             camera_patterns = [
+                r'(Canon EOS [R\d]+[^\s,]*(?:\s+[IVX]+)?)',
+                r'(Sony A[^\s,]+(?:\s+[IVX]+)?)',
+                r'(Leica [^\s,]+)',
+                r'(Hasselblad [^\s,]+)',
+                r'(Phase One [^\s,]+)',
+                r'(Fujifilm [^\s,]+)'
             ]
+            camera_model = None
             for pattern in camera_patterns:
+                match = re.search(pattern, camera_text, re.IGNORECASE)
+                if match:
+                    camera_model = match.group(1).strip()
+                    break
+            # Extract lens information with improved patterns
+            lens_patterns = [
+                r'(\d+mm\s*f/[\d.]+(?:\s*lens)?)',
+                r'(\d+-\d+mm\s*f/[\d.]+(?:\s*lens)?)',
+                r'(with\s+(?:a\s+)?(\d+mm[^,.]*))',
+                r'(paired with.*?(\d+mm[^,.]*))'
+            ]
+            lens_info = None
+            for pattern in lens_patterns:
+                match = re.search(pattern, camera_text, re.IGNORECASE)
+                if match:
+                    lens_info = match.group(1).strip()
+                    lens_info = re.sub(r'^(with\s+(?:a\s+)?|paired with\s+)', '', lens_info, flags=re.IGNORECASE)
+                    break
+            # Extract aperture if not in lens info
+            if not lens_info or 'f/' not in lens_info:
+                aperture_match = re.search(r'(f/[\d.]+)', camera_text)
+                aperture = aperture_match.group(1) if aperture_match else None
+                if aperture and lens_info:
+                    lens_info = f"{lens_info} {aperture}"
+            # Build clean recommendation
+            parts = []
+            if camera_model:
+                parts.append(camera_model)
+            if lens_info:
+                parts.append(lens_info)
+            if parts:
+                result = ', '.join(parts)
+                logger.info(f"Parsed camera recommendation: {result}")
+                return result
             return None
         except Exception as e:
+            logger.warning(f"Failed to parse camera recommendation: {e}")
+            return None
+    def _find_camera_recommendation(self, text: str) -> Optional[str]:
+        """Find camera recommendations anywhere in the text"""
+        try:
+            # Look for sentences containing camera info
+            sentences = re.split(r'[.!?]', text)
+            for sentence in sentences:
+                # Check if sentence contains camera info
+                if any(brand in sentence.lower() for brand in ['canon', 'sony', 'leica', 'hasselblad', 'phase one', 'fujifilm']):
+                    if any(term in sentence.lower() for term in ['recommend', 'suggest', 'would use', 'camera', 'lens']):
+                        parsed = self._parse_camera_recommendation(sentence.strip())
+                        if parsed:
+                            return parsed
+            return None
+        except Exception as e:
+            logger.warning(f"Failed to find camera recommendation: {e}")
             return None
     def _save_temp_image(self, image: Image.Image) -> str:
         }
         try:
+            # Enhanced prompt for better structured output
             if prompt is None:
+                prompt = """Analyze this image for professional photography reproduction. Provide exactly two sections:
+1. DESCRIPTION: Write a single flowing paragraph describing what you see. Start directly with the subject (e.g., "A color photograph showing..." or "A black and white image depicting..."). Include:
+- Image type (photograph, illustration, artwork)
+- Subject and composition
+- Color palette and lighting conditions
+- Mood and atmosphere
+- Photographic style and format
+2. CAMERA_SETUP: Based on the scene type you observe, recommend ONE specific professional camera and lens combination:
+- For street/documentary scenes: Canon EOS R6 with 35mm f/1.4 lens
+- For portrait photography: Canon EOS R5 with 85mm f/1.4 lens
+- For landscape photography: Phase One XT with 24-70mm f/4 lens
+- For action/sports: Sony A1 with 70-200mm f/2.8 lens
+Give only the camera model and lens specification, nothing else."""
             # Save image to temporary file
             temp_path = self._save_temp_image(image)
                 prompt=prompt,
                 show_thinking=False,
                 do_sample=False,
+                text_temperature=0.2,
                 max_new_tokens=512,
                 api_name=self.api_endpoint
             )
             else:
                 description = str(result)
+            # Process the description and extract camera setup
             if isinstance(description, str) and description.strip():
                 description = description.strip()
+                # Extract camera setup with improved parsing
                 camera_setup = self._extract_camera_setup(description)
                 if camera_setup:
                     metadata["camera_setup"] = camera_setup
                     metadata["has_camera_suggestion"] = True
+                    logger.info(f"Extracted camera setup: {camera_setup}")
                 else:
                     metadata["has_camera_suggestion"] = False
+                    logger.warning("No valid camera setup found in BAGEL response")
             else:
                 description = "Detailed image analysis completed successfully"
                 metadata["has_camera_suggestion"] = False
                 "response_length": len(description)
             })
+            logger.info(f"BAGEL API analysis complete: {len(description)} characters, Camera: {metadata.get('has_camera_suggestion', False)}")
             return description, metadata
         except Exception as e:
     def analyze_for_flux_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
         """Analyze image specifically for FLUX prompt generation"""
+        flux_prompt = """Analyze this image for professional FLUX generation. Provide exactly two sections:
+1. DESCRIPTION: Create a single flowing paragraph starting directly with the subject. Be precise about:
+- Image type (photograph, illustration, artwork)
+- Subject matter and composition
+- Color palette (specific colors, warm/cool tones, monochrome)
+- Lighting conditions and photographic style
+- Mood, atmosphere, and artistic elements
+2. CAMERA_SETUP: Recommend ONE specific professional camera and lens for this scene type:
+- Street/urban/documentary: Canon EOS R6 with 35mm f/1.4 lens
+- Portrait photography: Canon EOS R5 with 85mm f/1.4 lens
+- Landscape photography: Phase One XT with 24-70mm f/4 lens
+- Action/sports: Sony A1 with 70-200mm f/2.8 lens
+Give only the camera model and exact lens specification."""
         return self.analyze_image(image, flux_prompt)