BiRefNet_plus

Build error

App Files Files Community

ginipick commited on Feb 19

Commit

ec38b03

verified ·

1 Parent(s): 6a103fa

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -101

app.py CHANGED Viewed

@@ -1,15 +1,7 @@
-# --- 패치 적용: 모델 로딩 전에 실행 ---
-from transformers import PretrainedConfig
-PretrainedConfig.get_text_config = lambda self, decoder=True: type("DummyTextConfig", (), {"tie_word_embeddings": False})()
-# 이미지 분할 모델에 해당하는 모든 클래스의 tie_weights를 빈 함수로 오버라이드
-from transformers.models.auto.modeling_auto import MODEL_FOR_IMAGE_SEGMENTATION_MAPPING
-for model_class in MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.values():
-    model_class.tie_weights = lambda self: None
-# --- 패치 종료 ---
-from transformers import AutoModelForImageSegmentation
-from transformers import PreTrainedModel  # (참고용)
 import os
 import cv2
 import numpy as np
@@ -23,18 +15,50 @@ from typing import Tuple, Optional
 from PIL import Image
 from gradio_imageslider import ImageSlider
 from torchvision import transforms
 import requests
 from io import BytesIO
 import zipfile
 import random
-torch.set_float32_matmul_precision('high')
-torch.jit.script = lambda f: f
 device = "cuda" if torch.cuda.is_available() else "cpu"
-### 이미지 후처리 함수들 ###
 def refine_foreground(image, mask, r=90):
     if mask.size != image.size:
         mask = mask.resize(image.size)
@@ -61,6 +85,7 @@ def FB_blur_fusion_foreground_estimator(image, F, B, alpha, r=90):
     F = np.clip(F, 0, 1)
     return F, blurred_B
 class ImagePreprocessor():
     def __init__(self, resolution: Tuple[int, int] = (1024, 1024)) -> None:
         self.transform_image = transforms.Compose([
@@ -72,6 +97,11 @@ class ImagePreprocessor():
         image = self.transform_image(image)
         return image
 usage_to_weights_file = {
     'General': 'BiRefNet',
     'General-HR': 'BiRefNet_HR',
@@ -86,105 +116,113 @@ usage_to_weights_file = {
     'General-legacy': 'BiRefNet-legacy'
 }
-# 초기 모델 로딩 (기본: General)
-birefnet = AutoModelForImageSegmentation.from_pretrained(
-    '/'.join(('zhengpeng7', usage_to_weights_file['General'])),
-    trust_remote_code=True
 )
-birefnet.to(device)
-birefnet.eval(); birefnet.half()
 @spaces.GPU
 def predict(images, resolution, weights_file):
     assert images is not None, 'Images cannot be None.'
-    global birefnet
-    # 선택된 가중치로 모델 재로딩
-    _weights_file = '/'.join(('zhengpeng7', usage_to_weights_file[weights_file] if weights_file is not None else usage_to_weights_file['General']))
-    print('Using weights: {}.'.format(_weights_file))
-    birefnet = AutoModelForImageSegmentation.from_pretrained(_weights_file, trust_remote_code=True)
-    birefnet.to(device)
-    birefnet.eval(); birefnet.half()
     try:
-        resolution_list = [int(int(reso)//32*32) for reso in resolution.strip().split('x')]
     except:
-        if weights_file == 'General-HR':
-            resolution_list = [2048, 2048]
-        elif weights_file == 'General-Lite-2K':
-            resolution_list = [2560, 1440]
-        else:
-            resolution_list = [1024, 1024]
-        print('Invalid resolution input. Automatically changed to default.')
-    # 이미지가 단일 객체인지, 리스트(배치)인지 확인
     if isinstance(images, list):
-        tab_is_batch = True
     else:
         images = [images]
-        tab_is_batch = False
-    save_paths = []
-    save_dir = 'preds-BiRefNet'
-    if tab_is_batch and not os.path.exists(save_dir):
-        os.makedirs(save_dir)
-    outputs = []
     for idx, image_src in enumerate(images):
         if isinstance(image_src, str):
             if os.path.isfile(image_src):
                 image_ori = Image.open(image_src)
             else:
-                response = requests.get(image_src)
-                image_data = BytesIO(response.content)
-                image_ori = Image.open(image_data)
         else:
-            if isinstance(image_src, np.ndarray):
-                image_ori = Image.fromarray(image_src)
-            else:
-                image_ori = image_src.convert('RGB')
         image = image_ori.convert('RGB')
-        preprocessor = ImagePreprocessor(resolution=tuple(resolution_list))
-        image_proc = preprocessor.proc(image).unsqueeze(0)
-        with torch.no_grad():
-            preds = birefnet(image_proc.to(device).half())[-1].sigmoid().cpu()
-        pred = preds[0].squeeze()
-        pred_pil = transforms.ToPILImage()(pred)
         image_masked = refine_foreground(image, pred_pil)
         image_masked.putalpha(pred_pil.resize(image.size))
-        torch.cuda.empty_cache()
-        if tab_is_batch:
-            file_path = os.path.join(save_dir, "{}.png".format(
-                os.path.splitext(os.path.basename(image_src))[0] if isinstance(image_src, str) else f"img_{idx}"
-            ))
-            image_masked.save(file_path)
-            save_paths.append(file_path)
             outputs.append(image_masked)
         else:
             outputs = [image_masked, image_ori]
-    if tab_is_batch:
-        zip_file_path = os.path.join(save_dir, "{}.zip".format(save_dir))
-        with zipfile.ZipFile(zip_file_path, 'w') as zipf:
-            for file in save_paths:
-                zipf.write(file, os.path.basename(file))
-        return save_paths, zip_file_path
     else:
         return outputs
-# 예제 데이터 (이미지, URL, 배치)
-examples_image = [[path, "1024x1024", "General"] for path in glob('examples/*')]
-examples_text = [[url, "1024x1024", "General"] for url in ["https://hips.hearstapps.com/hmg-prod/images/gettyimages-1229892983-square.jpg"]]
-examples_batch = [[file, "1024x1024", "General"] for file in glob('examples/*')]
-descriptions = (
-    "Upload a picture, our model will extract a highly accurate segmentation of the subject in it.\n"
-    "The resolution used in our training was `1024x1024`, which is suggested for good results! "
-    "`2048x2048` is suggested for BiRefNet_HR.\n"
-    "Our codes can be found at https://github.com/ZhengPeng7/BiRefNet.\n"
-    "We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access."
-)
-# UI 개선을 위한 CSS
 css = """
 body {
     background: linear-gradient(135deg, #667eea, #764ba2);
@@ -239,16 +277,17 @@ button:hover, .btn:hover {
 }
 """
-title = """
-<h1 align="center" style="margin-bottom: 0.2em;">BiRefNet Demo for Subject Extraction</h1>
 <p align="center" style="font-size:1.1em; color:#555;">
-    Upload an image or provide an image URL to extract the subject with high-precision segmentation.
 </p>
 """
 with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
-    gr.Markdown(title)
     with gr.Tabs():
         with gr.Tab("Image"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -257,8 +296,14 @@ with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
                     weights_radio = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
                     predict_btn = gr.Button("Predict")
                 with gr.Column(scale=2):
-                    output_slider = ImageSlider(label="BiRefNet's Prediction", type="pil")
-            gr.Examples(examples=examples_image, inputs=[image_input, resolution_input, weights_radio], label="Examples")
         with gr.Tab("Text"):
             with gr.Row():
                 with gr.Column(scale=1):
@@ -267,23 +312,37 @@ with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
                     weights_radio_text = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
                     predict_btn_text = gr.Button("Predict")
                 with gr.Column(scale=2):
-                    output_slider_text = ImageSlider(label="BiRefNet's Prediction", type="pil")
-            gr.Examples(examples=examples_text, inputs=[image_url, resolution_input_text, weights_radio_text], label="Examples")
         with gr.Tab("Batch"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    file_input = gr.File(label="Upload Multiple Images", type="filepath", file_count="multiple")
                     resolution_input_batch = gr.Textbox(lines=1, placeholder="e.g., 1024x1024", label="Resolution")
                     weights_radio_batch = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
                     predict_btn_batch = gr.Button("Predict")
                 with gr.Column(scale=2):
-                    output_gallery = gr.Gallery(label="BiRefNet's Predictions", scale=1)
-                    zip_output = gr.File(label="Download Masked Images")
-            gr.Examples(examples=examples_batch, inputs=[file_input, resolution_input_batch, weights_radio_batch], label="Examples")
-    with gr.Row():
-        gr.Markdown("<p align='center'>Model by <a href='https://huggingface.co/ZhengPeng7/BiRefNet'>ZhengPeng7/BiRefNet</a></p>")
-    # 각 탭의 Predict 버튼과 predict 함수 연결
     predict_btn.click(
         fn=predict,
         inputs=[image_input, resolution_input, weights_radio],

+##########################################################
+# 0. 환경 설정 및 라이브러리 임포트
+##########################################################
 import os
 import cv2
 import numpy as np
 from PIL import Image
 from gradio_imageslider import ImageSlider
 from torchvision import transforms
 import requests
 from io import BytesIO
 import zipfile
 import random
+# Transformers
+from transformers import (
+    AutoConfig,
+    AutoModelForImageSegmentation,
+)
+# 1) Config를 먼저 로드하여 tie_weights 충돌을 방지
+config = AutoConfig.from_pretrained(
+    "zhengpeng7/BiRefNet",          # 👉 원하는 Hugging Face 모델 Repo
+    trust_remote_code=True
+)
+# 2) config.get_text_config 에 더미 메서드 부여 (tie_word_embeddings=False)
+def dummy_get_text_config(decoder=True):
+    return type("DummyTextConfig", (), {"tie_word_embeddings": False})()
+config.get_text_config = dummy_get_text_config
+# 3) 모델 구조만 만들기 (from_config) -> tie_weights 자동 호출 안 됨
+birefnet = AutoModelForImageSegmentation.from_config(config, trust_remote_code=True)
+birefnet.eval()
 device = "cuda" if torch.cuda.is_available() else "cpu"
+birefnet.to(device)
+birefnet.half()
+# 4) state_dict 로드 (가중치) - 로컬 파일 사용 예시
+#    실제로는 hf_hub_download / snapshot_download 등으로 "model.safetensors"를 미리 받은 뒤 사용
+print("Loading BiRefNet weights from local file: model.safetensors")
+state_dict = torch.load("model.safetensors", map_location="cpu")  # 예시
+missing, unexpected = birefnet.load_state_dict(state_dict, strict=False)
+print("[Info] Missing keys:", missing)
+print("[Info] Unexpected keys:", unexpected)
+torch.cuda.empty_cache()
+##########################################################
+# 1. 이미지 후처리 함수들
+##########################################################
 def refine_foreground(image, mask, r=90):
     if mask.size != image.size:
         mask = mask.resize(image.size)
     F = np.clip(F, 0, 1)
     return F, blurred_B
 class ImagePreprocessor():
     def __init__(self, resolution: Tuple[int, int] = (1024, 1024)) -> None:
         self.transform_image = transforms.Compose([
         image = self.transform_image(image)
         return image
+##########################################################
+# 2. 예제 설정 및 유틸
+##########################################################
 usage_to_weights_file = {
     'General': 'BiRefNet',
     'General-HR': 'BiRefNet_HR',
     'General-legacy': 'BiRefNet-legacy'
 }
+examples_image = [[path, "1024x1024", "General"] for path in glob('examples/*')]
+examples_text = [[url, "1024x1024", "General"] for url in [
+    "https://hips.hearstapps.com/hmg-prod/images/gettyimages-1229892983-square.jpg"
+]]
+examples_batch = [[file, "1024x1024", "General"] for file in glob('examples/*')]
+descriptions = (
+    "Upload a picture, our model will extract a highly accurate segmentation of the subject in it.\n"
+    "The resolution used in our training was `1024x1024`, which is suggested for good results! "
+    "`2048x2048` is suggested for BiRefNet_HR.\n"
+    "Our codes can be found at https://github.com/ZhengPeng7/BiRefNet.\n"
+    "We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access."
 )
+##########################################################
+# 3. 추론 함수 (이미 로드된 birefnet 모델 사용)
+##########################################################
 @spaces.GPU
 def predict(images, resolution, weights_file):
+    """
+    여기서는, 단일 birefnet 모델만 유지하고 있으며,
+    weight_file을 바꾸더라도 실제로는 이미 로드된 'birefnet' 모델만 사용.
+    (만약 다�� 가중치를 로드하고 싶다면, 아래처럼 로컬 state_dict 교체 방식 추가 가능.)
+    """
     assert images is not None, 'Images cannot be None.'
+    # Resolution parse
     try:
+        w, h = resolution.strip().split('x')
+        w, h = int(int(w)//32*32), int(int(h)//32*32)
+        resolution_list = (w, h)
     except:
+        print('[WARN] Invalid resolution input. Fallback to 1024x1024.')
+        resolution_list = (1024, 1024)
+    # 이미지가 여러 장일 수 있으므로 리스트로 처리
     if isinstance(images, list):
+        is_batch = True
+        outputs, save_paths = [], []
+        save_dir = 'preds-BiRefNet'
+        os.makedirs(save_dir, exist_ok=True)
     else:
         images = [images]
+        is_batch = False
     for idx, image_src in enumerate(images):
+        # str이면 파일 경로 혹은 URL
         if isinstance(image_src, str):
             if os.path.isfile(image_src):
                 image_ori = Image.open(image_src)
             else:
+                resp = requests.get(image_src)
+                image_ori = Image.open(BytesIO(resp.content))
+        # numpy 배열이면 Pillow 변환
+        elif isinstance(image_src, np.ndarray):
+            image_ori = Image.fromarray(image_src)
         else:
+            image_ori = image_src.convert('RGB')
         image = image_ori.convert('RGB')
+        preproc = ImagePreprocessor(resolution_list)
+        image_proc = preproc.proc(image).unsqueeze(0).to(device).half()
+        # 실제 추론
+        with torch.inference_mode():
+            # 결과 맨 마지막 레이어 preds
+            preds = birefnet(image_proc)[-1].sigmoid().cpu()
+        pred_mask = preds[0].squeeze()
+        # 후처리
+        pred_pil = transforms.ToPILImage()(pred_mask)
         image_masked = refine_foreground(image, pred_pil)
         image_masked.putalpha(pred_pil.resize(image.size))
+        if is_batch:
+            file_name = (
+                os.path.splitext(os.path.basename(image_src))[0]
+                if isinstance(image_src, str)
+                else f"img_{idx}"
+            )
+            out_path = os.path.join(save_dir, f"{file_name}.png")
+            image_masked.save(out_path)
+            save_paths.append(out_path)
             outputs.append(image_masked)
         else:
             outputs = [image_masked, image_ori]
+        torch.cuda.empty_cache()
+    # 배치라면 갤러리 + ZIP 반환
+    if is_batch:
+        zip_path = os.path.join(save_dir, f"{save_dir}.zip")
+        with zipfile.ZipFile(zip_path, 'w') as zipf:
+            for fpath in save_paths:
+                zipf.write(fpath, os.path.basename(fpath))
+        return (save_paths, zip_path)
     else:
         return outputs
+##########################################################
+# 4. Gradio UI
+##########################################################
+# 커스텀 CSS
 css = """
 body {
     background: linear-gradient(135deg, #667eea, #764ba2);
 }
 """
+title_html = """
+<h1 align="center" style="margin-bottom: 0.2em;">BiRefNet Demo (No Tie-Weights Crash)</h1>
 <p align="center" style="font-size:1.1em; color:#555;">
+    Using <code>from_config()</code> + local <code>state_dict</code> to bypass tie_weights issues
 </p>
 """
 with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
+    gr.Markdown(title_html)
     with gr.Tabs():
+        # 탭 1: Image
         with gr.Tab("Image"):
             with gr.Row():
                 with gr.Column(scale=1):
                     weights_radio = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
                     predict_btn = gr.Button("Predict")
                 with gr.Column(scale=2):
+                    output_slider = ImageSlider(label="Result", type="pil")
+            gr.Examples(
+                examples=examples_image,
+                inputs=[image_input, resolution_input, weights_radio],
+                label="Examples"
+            )
+        # 탭 2: Text(URL)
         with gr.Tab("Text"):
             with gr.Row():
                 with gr.Column(scale=1):
                     weights_radio_text = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
                     predict_btn_text = gr.Button("Predict")
                 with gr.Column(scale=2):
+                    output_slider_text = ImageSlider(label="Result", type="pil")
+            gr.Examples(
+                examples=examples_text,
+                inputs=[image_url, resolution_input_text, weights_radio_text],
+                label="Examples"
+            )
+        # 탭 3: Batch
         with gr.Tab("Batch"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    file_input = gr.File(
+                        label="Upload Multiple Images",
+                        type="filepath",
+                        file_count="multiple"
+                    )
                     resolution_input_batch = gr.Textbox(lines=1, placeholder="e.g., 1024x1024", label="Resolution")
                     weights_radio_batch = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
                     predict_btn_batch = gr.Button("Predict")
                 with gr.Column(scale=2):
+                    output_gallery = gr.Gallery(label="Results", scale=1)
+                    zip_output = gr.File(label="Zip Download")
+            gr.Examples(
+                examples=examples_batch,
+                inputs=[file_input, resolution_input_batch, weights_radio_batch],
+                label="Examples"
+            )
+    gr.Markdown("<p align='center'>Model by <a href='https://huggingface.co/ZhengPeng7/BiRefNet'>ZhengPeng7/BiRefNet</a></p>")
+    # 버튼 이벤트 연결
     predict_btn.click(
         fn=predict,
         inputs=[image_input, resolution_input, weights_radio],