Spaces:

skallewag
/

SEEM

Build error

App Files Files Community

skallewag commited on May 27

Commit

35fadf1

verified ·

1 Parent(s): ca69ff4

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -452

app.py CHANGED Viewed

@@ -6,474 +6,119 @@
 # Written by Xueyan Zou ([email protected]), Jianwei Yang ([email protected])
 # --------------------------------------------------------
-# This file is specifically adapted for Hugging Face Spaces deployment
 import os
 import sys
 import subprocess
-import warnings
-import traceback
-from pathlib import Path
-# Log all operations for debugging
-print("Starting SEEM HF Space setup...")
-print(f"Current directory: {os.getcwd()}")
-print(f"Python version: {sys.version}")
-# Create mock detectron2 structures to prevent import errors
-print("Setting up mock detectron2 module")
-class Boxes:
-    def __init__(self, *args, **kwargs):
-        pass
-class ImageList:
-    def __init__(self, *args, **kwargs):
-        pass
-    @staticmethod
-    def from_tensors(*args, **kwargs):
-        return ImageList()
-class Instances:
-    def __init__(self, *args, **kwargs):
-        pass
-class BitMasks:
-    def __init__(self, *args, **kwargs):
-        pass
-    @staticmethod
-    def from_polygon_masks(*args, **kwargs):
-        return BitMasks()
-class BoxMode:
-    XYXY_ABS = 0
-    XYWH_ABS = 1
-# Add mock detectron2 to sys.modules as a proper package
-if 'detectron2' not in sys.modules:
-    import types
-    detectron2_module = types.ModuleType('detectron2')
-    structures_module = types.ModuleType('detectron2.structures')
-    sys.modules['detectron2'] = detectron2_module
-    sys.modules['detectron2.structures'] = structures_module
-    # Add classes to structures module
-    structures_module.Boxes = Boxes
-    structures_module.ImageList = ImageList
-    structures_module.Instances = Instances
-    structures_module.BitMasks = BitMasks
-    structures_module.BoxMode = BoxMode
-    # Set structures as an attribute of detectron2
-    detectron2_module.structures = structures_module
-    print("Mock detectron2 module created")
-# Make sure utils directory exists
-os.makedirs('utils', exist_ok=True)
-print("Created utils directory if it didn't exist")
-# Create a custom distributed.py without mpi4py dependency
-with open('utils/distributed.py', 'w') as f:
-    f.write("""# Custom distributed.py without mpi4py dependency
-import os
-import torch
-import torch.distributed as dist
-class MPI:
-    class COMM_WORLD:
-        @staticmethod
-        def Get_rank():
-            return 0
-        @staticmethod
-        def Get_size():
-            return 1
-        @staticmethod
-        def bcast(data, root=0):
-            return data
-        @staticmethod
-        def barrier():
-            pass
-def apply_distributed(opt):
-    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    opt.rank = 0
-    opt.world_size = 1
-    opt.gpu = 0
-    return opt
-def init_distributed(opt=None):
-    if opt is not None:
-        opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        opt.rank = 0
-        opt.world_size = 1
-        opt.gpu = 0
-        return opt
-    return None
-def get_rank():
-    return 0
-def get_world_size():
-    return 1
-def is_main_process():
-    return True
-def synchronize():
-    pass
-def all_gather(data):
-    return [data]
-def reduce_dict(input_dict, average=True):
-    return input_dict
 """)
-print("Created custom distributed.py")
-# Ensure examples directory exists
-os.makedirs('examples', exist_ok=True)
-print("Created examples directory if it didn't exist")
-# Create a minimal interactive.py in tasks directory
-os.makedirs('tasks', exist_ok=True)
-with open('tasks/interactive.py', 'w') as f:
-    f.write("""
-import numpy as np
-from PIL import Image, ImageDraw
-def interactive_infer_image(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
-    # Get image dimensions
-    img = image['image']
-    h, w = img.size[1], img.size[0]
-    # Display a message and create a simple mask for demonstration
-    print("Called interactive_infer_image with tasks:", tasks)
-    print("Image size:", img.size)
-    if refimg is not None:
-        print("Referring image size:", refimg['image'].size)
-    if reftxt:
-        print("Text:", reftxt)
-    if audio_pth:
-        print("Audio path:", audio_pth)
-    # Create a simple circle mask in the center
-    mask = np.zeros((h, w), dtype=np.uint8)
-    center_x, center_y = w//2, h//2
-    radius = min(w, h) // 4
-    for y in range(h):
-        for x in range(w):
-            if ((x - center_x)**2 + (y - center_y)**2) < radius**2:
-                mask[y, x] = 255
-    return Image.fromarray(mask), None
-def interactive_infer_video(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
-    # Just return the input video for demonstration
-    print("Called interactive_infer_video with tasks:", tasks)
-    if video_pth:
-        print("Video path:", video_pth)
-    return None, video_pth
-""")
-print("Created simplified interactive.py")
-# Create some example placeholder files
-example_files = [
-    'corgi1.webp', 'corgi2.jpg', 'river1.png', 'river2.png',
-    'zebras1.jpg', 'zebras2.jpg', 'fries1.png', 'fries2.png',
-    'placeholder.png', 'ref_vase.JPG'
-]
-placeholder_img = None
-try:
-    from PIL import Image, ImageDraw
-    placeholder_img = Image.new('RGB', (400, 300), color=(240, 240, 240))
-    d = ImageDraw.Draw(placeholder_img)
-    d.text((150, 150), "Placeholder", fill=(0, 0, 0))
-except Exception as e:
-    print(f"Error creating placeholder image: {e}")
-for file_name in example_files:
-    file_path = os.path.join('examples', file_name)
-    if not os.path.exists(file_path) and placeholder_img is not None:
-        try:
-            placeholder_img.save(file_path)
-            print(f"Created {file_path}")
-        except Exception as e:
-            print(f"Error creating {file_path}: {e}")
-# Create dummy audio/video files if needed
-if not os.path.exists('examples/river1.wav'):
-    try:
-        with open('examples/river1.wav', 'wb') as f:
-            f.write(b'RIFF$\x00\x00\x00WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x00\x04\x00\x00\x00\x04\x00\x00\x01\x00\x08\x00data\x00\x00\x00\x00')
-        print("Created dummy audio file")
-    except Exception as e:
-        print(f"Error creating dummy audio file: {e}")
-if not os.path.exists('examples/vasedeck.mp4'):
-    try:
-        with open('examples/vasedeck.mp4', 'wb') as f:
-            f.write(b'\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00mp42mp41\x00\x00\x00\x00')
-        print("Created dummy video file")
-    except Exception as e:
-        print(f"Error creating dummy video file: {e}")
-# Continue with regular imports
-print("Importing required libraries...")
-try:
-    import PIL
-    from PIL import Image, ImageDraw
-    import gradio as gr
-    import torch
-    import argparse
-    import numpy as np
-    from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
-    from gradio import processing_utils
-    print("Basic imports successful")
-except Exception as e:
-    print(f"Error importing basic libraries: {e}")
-    traceback.print_exc()
-    sys.exit(1)
-# Try to import specialized libraries but handle their absence gracefully
-try:
-    import whisper
-    audio_loaded = True
-    print("Whisper loaded successfully")
-except Exception as e:
-    print(f"Error loading whisper: {e}")
-    audio_loaded = False
-# Global flags for model status
-model_loaded = False
-audio_loaded = audio_loaded if 'audio_loaded' in locals() else False
-interactive_functions_imported = False
-# Dummy constants if not available
-try:
-    from utils.constants import COCO_PANOPTIC_CLASSES
-    print("Loaded COCO_PANOPTIC_CLASSES")
-except ImportError:
-    print("Creating dummy COCO_PANOPTIC_CLASSES")
-    COCO_PANOPTIC_CLASSES = ["person", "cat", "dog", "car", "bicycle", "umbrella", "tree", "sky", "building"]
-# Try to import the model but handle failures gracefully
-try:
-    # Attempt to import specialized modules but don't fail if they're not available
-    try:
-        from modeling.BaseModel import BaseModel
-        from modeling import build_model
-        from utils.distributed import init_distributed
-        from utils.arguments import load_opt_from_config_files
-        print("Model imports successful")
-        # Try to import interactive functions
-        try:
-            from tasks.interactive import interactive_infer_image, interactive_infer_video
-            print("Successfully imported interactive functions from tasks.interactive")
-            interactive_functions_imported = True
-        except ImportError as e:
-            print(f"Error importing interactive functions: {e}")
-            interactive_functions_imported = False
-        # Try to set up the model
-        try:
-            parser = argparse.ArgumentParser('SEEM Demo', add_help=False)
-            parser.add_argument('--conf_files', default="configs/seem/focall_unicl_lang_demo.yaml", metavar="FILE", help='path to config file')
-            cfg = parser.parse_args()
-            opt = load_opt_from_config_files([cfg.conf_files])
-            opt = init_distributed(opt)
-            # META DATA
-            cur_model = 'None'
-            pretrained_pth = None
-            if 'focalt' in cfg.conf_files:
-                pretrained_pth = os.path.join("seem_focalt_v0.pt")
-                if not os.path.exists(pretrained_pth):
-                    print(f"Downloading model file {pretrained_pth}...")
-                    os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focalt_v0.pt"))
-                cur_model = 'Focal-T'
-            elif 'focal' in cfg.conf_files:
-                pretrained_pth = os.path.join("seem_focall_v0.pt")
-                if not os.path.exists(pretrained_pth):
-                    print(f"Downloading model file {pretrained_pth}...")
-                    os.system("wget {}".format("https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v0.pt"))
-                cur_model = 'Focal-L'
-            if pretrained_pth and os.path.exists(pretrained_pth):
-                device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-                print(f"Using device: {device}")
-                model = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth).eval().to(device)
-                with torch.no_grad():
-                    model.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(COCO_PANOPTIC_CLASSES + ["background"], is_eval=True)
-                print("Model loaded successfully")
-                model_loaded = True
-            else:
-                print(f"Model file not found: {pretrained_pth}")
-                model = None
-                model_loaded = False
-        except Exception as e:
-            print(f"Error setting up model: {e}")
-            traceback.print_exc()
-            model = None
-            model_loaded = False
-    except Exception as e:
-        print(f"Error during model import: {e}")
-        traceback.print_exc()
-        model = None
-        model_loaded = False
-except Exception as e:
-    print(f"Error during model setup: {e}")
-    traceback.print_exc()
-    model = None
-    model_loaded = False
-# If interactive functions weren't imported, define dummy versions
-if not interactive_functions_imported:
-    print("Creating dummy interactive functions")
-    def interactive_infer_image(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
-        # Create a simple circle mask in the center
-        img = image['image']
-        h, w = img.size[1], img.size[0]
-        mask = np.zeros((h, w), dtype=np.uint8)
-        center_x, center_y = w//2, h//2
-        radius = min(w, h) // 4
-        for y in range(h):
-            for x in range(w):
-                if ((x - center_x)**2 + (y - center_y)**2) < radius**2:
-                    mask[y, x] = 255
-        return Image.fromarray(mask), None
-    def interactive_infer_video(model, audio_model, image, tasks, refimg=None, reftxt=None, audio_pth=None, video_pth=None):
-        return None, video_pth
-# Inference function
-@torch.no_grad()
-def inference(image, task, *args, **kwargs):
-    if not model_loaded:
-        # Return a placeholder image with an informative message
-        print("Model not loaded, returning placeholder image")
-        # Generate a simple mask based on the image size
-        if image is not None:
-            try:
-                h, w = image.size[1], image.size[0]
-                mask = np.zeros((h, w), dtype=np.uint8)
-                # Add a simple shape to the mask for demonstration
-                center_x, center_y = w//2, h//2
-                radius = min(w, h) // 4
-                for y in range(h):
-                    for x in range(w):
-                        if ((x - center_x)**2 + (y - center_y)**2) < radius**2:
-                            mask[y, x] = 255
-                return Image.fromarray(mask), None
-            except Exception as e:
-                print(f"Error creating demo mask: {e}")
-                warning_img = Image.new('RGB', (600, 400), color=(240, 240, 240))
-                d = ImageDraw.Draw(warning_img)
-                d.text((50, 150), "Model could not be loaded.", fill=(255, 0, 0))
-                d.text((50, 200), "Using simplified interface for demonstration.", fill=(255, 0, 0))
-                return warning_img, None
-        warning_img = Image.new('RGB', (600, 400), color=(240, 240, 240))
-        d = ImageDraw.Draw(warning_img)
-        d.text((50, 150), "Model could not be loaded.", fill=(255, 0, 0))
-        d.text((50, 200), "Using simplified interface for demonstration.", fill=(255, 0, 0))
-        return warning_img, None
-    # Prepare input parameters for the interactive functions
-    image_input = {"image": image, "mask": kwargs.get("mask", None)}
-    referring_image = kwargs.get("referring_image", None)
-    # If referring image is provided, prepare it in the expected format
-    refimg = None
-    if referring_image is not None:
-        refimg = {"image": referring_image, "mask": kwargs.get("referring_mask", None)}
-    # Get text and audio parameters
-    reftxt = kwargs.get("referring_text", "")
-    audio_pth = kwargs.get("referring_audio", None)
-    video_pth = kwargs.get("video", None)
-    # Call the appropriate interactive function
     try:
-        if 'Video' in task:
-            return interactive_infer_video(model, audio, image_input, task, refimg, reftxt, audio_pth, video_pth)
-        else:
-            return interactive_infer_image(model, audio, image_input, task, refimg, reftxt, audio_pth, video_pth)
     except Exception as e:
-        print(f"Error during inference: {e}")
         traceback.print_exc()
-        warning_img = Image.new('RGB', (600, 400), color=(240, 240, 240))
-        d = ImageDraw.Draw(warning_img)
-        d.text((50, 150), f"Error: {str(e)}", fill=(255, 0, 0))
-        d.text((50, 200), "Please check logs for details.", fill=(255, 0, 0))
-        return warning_img, None
-'''
-launch app
-'''
-title = "SEEM: Segment Everything Everywhere All At Once"
-# Update description based on model loading status
-if model_loaded:
-    model_status = f"<span style=\"color:green;\">✓ Model loaded successfully</span> (SEEM {cur_model})"
-else:
-    model_status = "<span style=\"color:orange;\">⚠ Running in demonstration mode</span> (model not loaded)"
-description = f"""
-<div style="text-align: center; font-weight: bold;">
-    <span style="font-size: 18px" id="paper-info">
-        [<a href="https://github.com/UX-Decoder/Segment-Everything-Everywhere-All-At-Once" target="_blank">GitHub</a>]
-        [<a href="https://arxiv.org/pdf/2304.06718.pdf" target="_blank">arXiv</a>]
-    </span>
-</div>
-<div style="text-align: left; font-weight: bold;">
-    <br>
-    &#x1F32A Status: {model_status}
-    </p>
-</div>
-"""
-article = "SEEM Demo" + (" (Simplified Interface)" if not model_loaded else "")
-inputs = [
-    gr.Image(label="[Stroke] Draw on Image", type="pil"),
-    gr.CheckboxGroup(choices=["Stroke", "Example", "Text", "Audio", "Video", "Panoptic"], label="Interactive Mode"),
-    gr.Image(label="[Example] Draw on Referring Image", type="pil"),
-    gr.Textbox(label="[Text] Referring Text"),
-    gr.Audio(label="[Audio] Referring Audio", source="microphone", type="filepath"),
-    gr.Video(label="[Video] Referring Video Segmentation", format="mp4")
-]
-outputs = [
-    gr.outputs.Image(type="pil", label="Segmentation Results (COCO classes as label)"),
-    gr.outputs.Video(label="Video Segmentation Results (COCO classes as label)")
-]
-gr.Interface(
-    fn=inference,
-    inputs=inputs,
-    outputs=outputs,
-    examples=[
-    ["examples/corgi1.webp", ["Text"], "examples/corgi2.jpg", "The corgi.", None, None],
-    ["examples/river1.png", ["Text", "Audio"], "examples/river2.png", "The green trees.", "examples/river1.wav", None],
-    ["examples/zebras1.jpg", ["Example"], "examples/zebras2.jpg", "", None, None],
-    ["examples/fries1.png", ["Example"], "examples/fries2.png", "", None, None],
-    ["examples/placeholder.png", ["Video"], "examples/ref_vase.JPG", "", None, "examples/vasedeck.mp4"],
-    ],
-    title=title,
-    description=description,
-    article=article,
-    allow_flagging='never',
-    cache_examples=False,
-).launch()

 # Written by Xueyan Zou ([email protected]), Jianwei Yang ([email protected])
 # --------------------------------------------------------
+# Hugging Face Spaces Launcher
 import os
 import sys
 import subprocess
+import importlib.util
+import logging
+import time
+# Configure logging
+logging.basicConfig(level=logging.INFO,
+                   format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger("SEEM-HF")
+def run_command(cmd, description=None):
+    """Run a shell command and log its output"""
+    if description:
+        logger.info(f"Running: {description}")
+    logger.info(f"Command: {cmd}")
+    try:
+        process = subprocess.Popen(
+            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+            universal_newlines=True
+        )
+        # Stream and log output in real-time
+        for line in process.stdout:
+            line = line.rstrip()
+            logger.info(line)
+        process.wait()
+        return process.returncode == 0
+    except Exception as e:
+        logger.error(f"Error executing command: {e}")
+        return False
+def install_dependencies():
+    """Install required dependencies"""
+    # Check if ffmpeg is installed
+    logger.info("Checking for ffmpeg...")
+    if not run_command("which ffmpeg", "Checking ffmpeg"):
+        logger.info("Installing ffmpeg...")
+        run_command("apt-get update && apt-get install -y ffmpeg", "Installing ffmpeg")
+    # Install Python dependencies
+    logger.info("Installing Python dependencies...")
+    if os.path.exists("assets/requirements/requirements.txt"):
+        run_command("pip install -r assets/requirements/requirements.txt", "Installing base requirements")
+    else:
+        logger.warning("Base requirements file not found, creating minimal requirements")
+        with open("requirements.txt", "w") as f:
+            f.write("""torch>=1.12.0
+torchvision>=0.13.0
+opencv-python-headless>=4.5.0
+numpy>=1.23.5
+gradio>=3.13.0
+Pillow>=9.0.0
+openai-whisper
 """)
+        run_command("pip install -r requirements.txt", "Installing minimal requirements")
+    if os.path.exists("assets/requirements/requirements_custom.txt"):
+        run_command("pip install -r assets/requirements/requirements_custom.txt", "Installing custom requirements")
+def setup_environment():
+    """Set up the necessary directories and environment"""
+    # Create necessary directories
+    os.makedirs('utils', exist_ok=True)
+    os.makedirs('modeling', exist_ok=True)
+    os.makedirs('modeling/architectures', exist_ok=True)
+    os.makedirs('tasks', exist_ok=True)
+    os.makedirs('examples', exist_ok=True)
+    logger.info("Created required directories")
+    # Make sure demo/seem directory exists
+    if not os.path.exists("demo/seem"):
+        logger.error("demo/seem directory not found!")
+        return False
+    return True
+def main():
+    """Main entry point"""
+    logger.info("Starting SEEM Hugging Face Space")
+    # Install dependencies
+    install_dependencies()
+    # Setup environment
+    if not setup_environment():
+        return
+    # Prepare to run the actual app
+    app_path = "demo/seem/app.py"
+    if not os.path.exists(app_path):
+        logger.error(f"Application file not found at {app_path}!")
+        return
+    logger.info(f"Loading application from {app_path}")
+    # Add the demo directory to Python path
+    sys.path.insert(0, os.path.abspath('demo'))
+    # Load and run the app module
     try:
+        spec = importlib.util.spec_from_file_location("seem_app", app_path)
+        seem_app = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(seem_app)
+        logger.info("SEEM application loaded successfully")
     except Exception as e:
+        logger.error(f"Error loading application: {e}")
+        import traceback
         traceback.print_exc()
+if __name__ == "__main__":
+    main()