Spaces:

xuan2k
/

Thesis-Demo

Runtime error

App Files Files Community

Ubuntu commited on May 28, 2024

Commit

e5efca7

1 Parent(s): c071a86

Update Inpainting Demo

Browse files

Files changed (7) hide show

.gitignore +1 -0
.log/log.txt +6 -0
SegFormer +1 -0
output.png +0 -0
requirements.txt +2 -2
test.png +0 -0
test.py +168 -76

.gitignore CHANGED Viewed

@@ -1,4 +1,5 @@
 __pycache__
 *.pyc
 checkpoints/
 *.pth

 __pycache__
 *.pyc
 checkpoints/
+I2SB/
 *.pth

.log/log.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+[19:02:29] INFO     (0:00:00) Loaded options from opt_pkl_path=PosixPath('I2SB/results/inpaint-freeform2030/options.pkl')!
+           INFO     (0:00:00) [Diffusion] Built I2SB diffusion: steps=1000!
+[19:02:33] INFO     (0:00:03) [Net] Initialized network from ckpt_pkl='I2SB/data/256x256_diffusion_uncond_fixedsigma.pkl'! Size=552807171!
+[19:02:44] INFO     (0:00:14) [Net] Loaded pretrained adm ckpt_pt='I2SB/data/256x256_diffusion_uncond_fixedsigma.pt'!
+[19:02:49] INFO     (0:00:19) [Net] Loaded network ckpt: I2SB/results/inpaint-freeform2030/latest.pt!
+[19:02:50] INFO     (0:00:20) [Ema] Loaded ema ckpt: I2SB/results/inpaint-freeform2030/latest.pt!

SegFormer ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 64ab11278eb30b8e2d8ea1d10a777fc5b1563948

output.png ADDED Viewed

requirements.txt CHANGED Viewed

@@ -18,8 +18,8 @@ timm
 # torch==2.0.0
 # torchvision==0.15.1
-torch==2.2.1
-torchvision==0.17.1
 gevent
 yapf

 # torch==2.0.0
 # torchvision==0.15.1
+# torch==2.2.1
+# torchvision==0.17.1
 gevent
 yapf

test.png ADDED Viewed

test.py CHANGED Viewed

@@ -36,6 +36,34 @@ from GroundingDINO.groundingdino.util import box_ops
 from GroundingDINO.groundingdino.util.slconfig import SLConfig
 from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
 import cv2
 import numpy as np
 import matplotlib
@@ -126,6 +154,30 @@ kosmos_processor = None
 colors = [(255, 0, 0), (0, 255, 0)]
 markers = [1, 5]
 def get_point(img, sel_pix, evt: gr.SelectData):
         img = np.array(img, dtype=np.uint8)
         sel_pix.append(evt.index)
@@ -146,6 +198,10 @@ def undo_button(orig_img, sel_pix):
         for point in sel_pix:
             cv2.drawMarker(temp, point, colors[0], markerType=markers[0], markerSize=6, thickness=2)
     return Image.fromarray(temp).convert("RGB")
 def toggle_button(orig_img, task_type):
     print(task_type)
@@ -173,6 +229,37 @@ def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
     _ = model.eval()
     return model
 def plot_boxes_to_image(image_pil, tgt):
     H, W = tgt["size"]
     boxes = tgt["boxes"]
@@ -238,6 +325,8 @@ def load_image(image_path):
     image, _ = transform(image_pil, None)  # 3, h, w
     return image_pil, image
 def get_grounding_output(model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu"):
     caption = caption.lower()
     caption = caption.strip()
@@ -357,6 +446,24 @@ def load_sd_model(device):
                 torch_dtype=torch.float16,
         )
         sd_model = sd_model.to(device)
 def lama_cleaner_process(image, mask, cleaner_size_limit=1080):
     try:
@@ -511,7 +618,7 @@ def concatenate_images_vertical(image1, image2):
     return new_image
 mask_source_draw = "draw a mask on input image"
-mask_source_segment = "type what to detect below"
 def get_time_cost(run_task_time, time_cost_str):
     now_time = int(time.time()*1000)
@@ -524,11 +631,8 @@ def get_time_cost(run_task_time, time_cost_str):
     run_task_time = now_time
     return run_task_time, time_cost_str
-def run_anything_task(input_image, input_points, origin_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
-            iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input, cleaner_size_limit=1080):
-    text_prompt = getTextTrans(text_prompt, source='zh', target='en')
-    inpaint_prompt = getTextTrans(inpaint_prompt, source='zh', target='en')
     run_task_time = 0
     time_cost_str = ''
@@ -543,27 +647,19 @@ def run_anything_task(input_image, input_points, origin_image, text_prompt, task
             image_pil, image = load_image(input_image.convert("RGB"))
             input_img = input_image
-        kosmos_image, kosmos_text, kosmos_entities = kosmos_generate_predictions(image_pil, kosmos_input, kosmos_model, kosmos_processor)
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         return None, None, time_cost_str, kosmos_image, gr.Textbox.update(visible=(time_cost_str !='')), kosmos_text, kosmos_entities
-    text_prompt = text_prompt.strip()
-    # if not ((task_type in ['inpainting', 'outpainting'] or task_type == 'remove') and mask_source_radio == mask_source_draw):
-    #     if text_prompt == '':
-    #         return [], gr.Gallery.update(label='Detection prompt is not found!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     if input_image is None:
             return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     file_temp = int(time.time())
-    logger.info(f'run_anything_task_002/{device}_[{file_temp}]_{task_type}/{inpaint_mode}/[{mask_source_radio}]/{remove_mode}/{remove_mask_extend}_[{text_prompt}]/[{inpaint_prompt}]___1_')
     output_images = []
     # load image
-    if mask_source_radio == mask_source_draw:
-        input_mask_pil = input_image['mask']
-        input_mask = np.array(input_mask_pil.convert("L"))
     if isinstance(input_image, dict):
         image_pil, image = load_image(input_image['image'].convert("RGB"))
@@ -626,17 +722,17 @@ def run_anything_task(input_image, input_points, origin_image, text_prompt, task
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
         return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     elif task_type in ['inpainting', 'outpainting'] or task_type == 'remove':
-        if inpaint_prompt.strip() == '' and mask_source_radio == mask_source_segment:
             task_type = 'remove'
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_4_')
         if mask_source_radio == mask_source_draw:
             mask_pil = input_mask_pil
             mask = input_mask
         else:
             masks_ori = copy.deepcopy(masks)
-            if inpaint_mode == 'merge':
-                masks = torch.sum(masks, dim=0).unsqueeze(0)
             masks = torch.where(masks > 0, True, False)
             mask = masks[0][0].cpu().numpy()
             mask_pil = Image.fromarray(mask)
@@ -644,18 +740,11 @@ def run_anything_task(input_image, input_points, origin_image, text_prompt, task
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         if task_type in ['inpainting', 'outpainting']:
-            # inpainting pipeline
-            image_source_for_inpaint = image_pil.resize((512, 512))
-            image_mask_for_inpaint = mask_pil.resize((512, 512))
-            if task_type in ['outpainting']:
-                # reverse mask
-                img_arr = np.array(image_mask_for_inpaint)
-                img_arr = np.where(img_arr > 0, 1, img_arr)
-                img_arr = 1 - img_arr
-                image_mask_for_inpaint = Image.fromarray(255*img_arr.astype('uint8'))
-                output_images.append(image_mask_for_inpaint.convert("RGB"))
-                run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
-            image_inpainting = sd_model(prompt=inpaint_prompt, image=image_source_for_inpaint, mask_image=image_mask_for_inpaint).images[0]
         else:
             # remove from mask
             aasds = 1
@@ -681,8 +770,6 @@ def run_anything_task(input_image, input_points, origin_image, text_prompt, task
     return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
 def change_radio_display(task_type, mask_source_radio, orig_img):
-    text_prompt_visible = True
-    inpaint_prompt_visible = False
     mask_source_radio_visible = False
     num_relation_visible = False
@@ -693,35 +780,29 @@ def change_radio_display(task_type, mask_source_radio, orig_img):
     print(task_type)
     if task_type == "Kosmos-2":
         if kosmos_enable:
-            text_prompt_visible = False
             image_gallery_visible = False
             kosmos_input_visible = True
             kosmos_output_visible = True
             kosmos_text_output_visible = True
-    if task_type in ['inpainting', 'outpainting']:
-        inpaint_prompt_visible = False
     if task_type in ['inpainting', 'outpainting'] or task_type == "remove":
         mask_source_radio_visible = True
-        if mask_source_radio == mask_source_draw:
-            text_prompt_visible = False
     if task_type == "relate anything":
-        text_prompt_visible = False
         num_relation_visible = True
     if task_type == "segment":
         ret = gr.Image(value= orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "editor")# tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
     elif task_type == "inpainting":
         ret = gr.Image(value = orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
-    return  (gr.Textbox.update(visible=text_prompt_visible),
-            gr.Textbox.update(visible=inpaint_prompt_visible),
-            gr.Radio.update(visible=mask_source_radio_visible),
             gr.Slider.update(visible=num_relation_visible),
             gr.Gallery.update(visible=image_gallery_visible),
             gr.Radio.update(visible=kosmos_input_visible),
             gr.Image.update(visible=kosmos_output_visible),
             gr.HighlightedText.update(visible=kosmos_text_output_visible),
-            ret, [], gr.Button("Undo point", visible = task_type == "segment"))
 def get_model_device(module):
     try:
@@ -770,42 +851,52 @@ def main_gradio(args):
                     [input_image, selected_points],
                     [input_image]
                 )
-                undo_point_button = gr.Button("Undo point")
-                undo_point_button.click(
-                    fn= undo_button,
-                    inputs=[original_image, selected_points],
-                    outputs=[input_image]
-                )
                 print(dir(input_image))
                 task_type = gr.Radio(task_types,  value="segment",
                                                 label='Task type', visible=True)
                 mask_source_radio = gr.Radio([mask_source_draw, mask_source_segment],
-                                    value=mask_source_segment, label="Mask from",
                                     visible=False)
-                text_prompt = gr.Textbox(label="Detection", placeholder="Cannot be empty")
-                inpaint_prompt = gr.Textbox(label="Inpaint Prompt (if this is empty, then remove)", visible=False)
                 num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1, visible=False)
                 kosmos_input = gr.Radio(["Brief", "Detailed"], label="Kosmos Description Type", value="Brief", visible=False)
                 run_button = gr.Button(label="Run", visible=True)
-                with gr.Accordion("Advanced options", open=False) as advanced_options:
-                    box_threshold = gr.Slider(
-                        label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.001
-                    )
-                    text_threshold = gr.Slider(
-                        label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
-                    )
-                    iou_threshold = gr.Slider(
-                        label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.001
-                    )
-                    inpaint_mode = gr.Radio(["merge", "first"], value="merge", label="inpaint_mode")
-                    with gr.Row():
-                        with gr.Column(scale=1):
-                            remove_mode = gr.Radio(["segment", "rectangle"],  value="segment", label='remove mode')
-                        with gr.Column(scale=1):
-                            remove_mask_extend = gr.Textbox(label="remove_mask_extend", value='10')
             with gr.Column():
                 image_gallery = gr.Gallery(label="result images", show_label=True, elem_id="gallery", height=512, visible=True
@@ -841,15 +932,15 @@ def main_gradio(args):
                 selected.change(update_output_image, [kosmos_output, kosmos_output, entity_output, selected], [kosmos_output])
             run_button.click(fn=run_anything_task, inputs=[
-                            input_image, selected_points, original_image, text_prompt, task_type, inpaint_prompt, box_threshold, text_threshold,
-                            iou_threshold, inpaint_mode, mask_source_radio, remove_mode, remove_mask_extend, num_relation, kosmos_input],
                             outputs=[image_gallery, image_gallery, time_cost, time_cost, kosmos_output, kosmos_text_output, entity_output], show_progress=True, queue=True)
             mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
-                            outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation])
             task_type.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
-                            outputs=[text_prompt, inpaint_prompt, mask_source_radio, num_relation,
-                            image_gallery, kosmos_input, kosmos_output, kosmos_text_output, input_image, selected_points, undo_point_button
                             ])
         # DESCRIPTION = f'### This demo from [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything). <br>'
@@ -895,8 +986,9 @@ if __name__ == "__main__":
     if sam_enable:
         load_sam_model(device)
-    # if inpainting_enable:
-    #     load_sd_model(device)
     # if lama_cleaner_enable:
     #     load_lama_cleaner_model(device)

 from GroundingDINO.groundingdino.util.slconfig import SLConfig
 from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
+# I2SB
+import sys
+sys.path.insert(0, "/home/ubuntu/Thesis-Demo/I2SB")
+import numpy as np
+import torch
+import torch.distributed as dist
+import torchvision.transforms as transforms
+import torchvision.utils as tu
+from easydict import EasyDict as edict
+from fastapi import (Body, Depends, FastAPI, File, Form, HTTPException, Query,
+                     UploadFile)
+from ipdb import set_trace as debug
+from PIL import Image
+from torch.multiprocessing import Process
+from torch.utils.data import DataLoader, Subset
+from torch_ema import ExponentialMovingAverage
+import I2SB.distributed_util as dist_util
+from I2SB.corruption import build_corruption
+from I2SB.dataset import air_liquide
+from I2SB.i2sb import Runner, ckpt_util, download_ckpt
+from I2SB.logger import Logger
+from I2SB.sample import *
 import cv2
 import numpy as np
 import matplotlib
 colors = [(255, 0, 0), (0, 255, 0)]
 markers = [1, 5]
+i2sb_opt = edict(
+    distributed=False,
+    device="cuda",
+    batch_size=1,
+    nfe=10,
+    dataset="sample",
+    dataset_dir=Path(f"dataset/sample"),
+    n_gpu_per_node=1,
+    use_fp16=False,
+    ckpt="inpaint-freeform2030",
+    image_size=256,
+    partition=None,
+    global_size=1,
+    global_rank=0,
+    clip_denoise=True
+)
+i2sb_transforms = transforms.Compose([
+    transforms.Resize(i2sb_opt.image_size),
+    transforms.CenterCrop(i2sb_opt.image_size),
+    transforms.ToTensor(),
+    transforms.Lambda(lambda t: (t * 2) - 1)  # [0,1] --> [-1, 1]
+])
 def get_point(img, sel_pix, evt: gr.SelectData):
         img = np.array(img, dtype=np.uint8)
         sel_pix.append(evt.index)
         for point in sel_pix:
             cv2.drawMarker(temp, point, colors[0], markerType=markers[0], markerSize=6, thickness=2)
     return Image.fromarray(temp).convert("RGB")
+def clear_button(orig_img):
+    return orig_img, []
 def toggle_button(orig_img, task_type):
     print(task_type)
     _ = model.eval()
     return model
+def load_i2sb_model():
+    RESULT_DIR = Path("I2SB/results")
+    global i2sb_model
+    global ckpt_opt
+    global corrupt_type
+    global nfe
+    s = time.time()
+    # main from here
+    log = Logger(0, ".log")
+    # get (default) ckpt option
+    ckpt_opt = ckpt_util.build_ckpt_option(i2sb_opt, log, RESULT_DIR / i2sb_opt.ckpt)
+    corrupt_type = ckpt_opt.corrupt
+    nfe = i2sb_opt.nfe or ckpt_opt.interval-1
+    # build corruption method
+    # corrupt_method = build_corruption(i2sb_opt, log, corrupt_type=cor
+    # rupt_type)
+    runner = Runner(ckpt_opt, log, save_opt=False)
+    if i2sb_opt.use_fp16:
+        runner.ema.copy_to()  # copy weight from ema to net
+        runner.net.diffusion_model.convert_to_fp16()
+        runner.ema = ExponentialMovingAverage(
+            runner.net.parameters(), decay=0.99)  # re-init ema with fp16 weight
+    print("Loading time:", (time.time()-s)*1e3, "ms.")
+    i2sb_model = runner
+    return runner
 def plot_boxes_to_image(image_pil, tgt):
     H, W = tgt["size"]
     boxes = tgt["boxes"]
     image, _ = transform(image_pil, None)  # 3, h, w
     return image_pil, image
 def get_grounding_output(model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu"):
     caption = caption.lower()
     caption = caption.strip()
                 torch_dtype=torch.float16,
         )
         sd_model = sd_model.to(device)
+def forward_i2sb(img, mask):
+    print(np.unique(img),mask.shape)
+    mask = np.where(mask > 0, 1, 0)
+    img_tensor = i2sb_transforms(img).to(
+            i2sb_opt.device).unsqueeze(0)
+    mask_tensor = torch.from_numpy(np.resize(np.array(mask), (256,256))).to(
+            i2sb_opt.device).unsqueeze(0).unsqueeze(0)
+    print("POST PROCESSING\t", torch.unique(img_tensor))
+    # corrupt_tensor = img_tensor * (1. - mask_tensor) + mask_tensor
+    f = time.time()
+    xs, _ = i2sb_model.ddpm_sampling(
+        ckpt_opt, img_tensor, mask=mask_tensor, cond=None, clip_denoise=i2sb_opt.clip_denoise, nfe=nfe, verbose=i2sb_opt.n_gpu_per_node == 1)
+    recon_img = xs[:, 0, ...].to(i2sb_opt.device)
+    tu.save_image((recon_img+1)/2, "output.png")
+    print(recon_img.shape)
+    return transforms.ToPILImage()(((recon_img+1)/2)[0])
 def lama_cleaner_process(image, mask, cleaner_size_limit=1080):
     try:
     return new_image
 mask_source_draw = "draw a mask on input image"
+mask_source_segment = "upload a mask"
 def get_time_cost(run_task_time, time_cost_str):
     now_time = int(time.time()*1000)
     run_task_time = now_time
     return run_task_time, time_cost_str
+def run_anything_task(input_image, input_points, origin_image, task_type,
+            mask_source_radio, cleaner_size_limit=1080):
     run_task_time = 0
     time_cost_str = ''
             image_pil, image = load_image(input_image.convert("RGB"))
             input_img = input_image
+        kosmos_image, kosmos_text, kosmos_entities = kosmos_generate_predictions(image_pil, kosmos_model, kosmos_processor)
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         return None, None, time_cost_str, kosmos_image, gr.Textbox.update(visible=(time_cost_str !='')), kosmos_text, kosmos_entities
     if input_image is None:
             return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     file_temp = int(time.time())
+    logger.info(f'run_anything_task_002/{device}_[{file_temp}]_{task_type}/[{mask_source_radio}]_1_')
     output_images = []
     # load image
     if isinstance(input_image, dict):
         image_pil, image = load_image(input_image['image'].convert("RGB"))
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
         return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     elif task_type in ['inpainting', 'outpainting'] or task_type == 'remove':
+        if mask_source_radio == mask_source_segment:
             task_type = 'remove'
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_4_')
         if mask_source_radio == mask_source_draw:
+            input_mask_pil = input_image['mask']
+            input_mask = np.array(input_mask_pil.convert("L"))
             mask_pil = input_mask_pil
             mask = input_mask
         else:
             masks_ori = copy.deepcopy(masks)
             masks = torch.where(masks > 0, True, False)
             mask = masks[0][0].cpu().numpy()
             mask_pil = Image.fromarray(mask)
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         if task_type in ['inpainting', 'outpainting']:
+            # image_inpainting = sd_model(prompt = "", image=image_source_for_inpaint, mask_image=image_mask_for_inpaint).images[0]
+            input_img.save("test.png")
+            image_inpainting = forward_i2sb(input_img, mask)
+            print("RESULT\t", np.array(image_inpainting))
         else:
             # remove from mask
             aasds = 1
     return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
 def change_radio_display(task_type, mask_source_radio, orig_img):
     mask_source_radio_visible = False
     num_relation_visible = False
     print(task_type)
     if task_type == "Kosmos-2":
         if kosmos_enable:
             image_gallery_visible = False
             kosmos_input_visible = True
             kosmos_output_visible = True
             kosmos_text_output_visible = True
     if task_type in ['inpainting', 'outpainting'] or task_type == "remove":
         mask_source_radio_visible = True
     if task_type == "relate anything":
         num_relation_visible = True
     if task_type == "segment":
         ret = gr.Image(value= orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "editor")# tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
     elif task_type == "inpainting":
         ret = gr.Image(value = orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
+    return  (gr.Radio.update(visible=mask_source_radio_visible),
             gr.Slider.update(visible=num_relation_visible),
             gr.Gallery.update(visible=image_gallery_visible),
             gr.Radio.update(visible=kosmos_input_visible),
             gr.Image.update(visible=kosmos_output_visible),
             gr.HighlightedText.update(visible=kosmos_text_output_visible),
+            ret, [],
+            gr.Button("Undo point", visible = task_type == "segment"),
+            gr.Button("Clear point", visible = task_type == "segment"),)
 def get_model_device(module):
     try:
                     [input_image, selected_points],
                     [input_image]
                 )
+                with gr.Row():
+                    with gr.Column():
+                        undo_point_button = gr.Button("Undo point")
+                        undo_point_button.click(
+                            fn= undo_button,
+                            inputs=[original_image, selected_points],
+                            outputs=[input_image]
+                        )
+                    with gr.Column():
+                        clear_point_button = gr.Button("Clear point")
+                        clear_point_button.click(
+                            fn= clear_button,
+                            inputs=[original_image],
+                            outputs=[input_image, selected_points]
+                        )
                 print(dir(input_image))
                 task_type = gr.Radio(task_types,  value="segment",
                                                 label='Task type', visible=True)
                 mask_source_radio = gr.Radio([mask_source_draw, mask_source_segment],
+                                    value=mask_source_draw, label="Mask from",
                                     visible=False)
                 num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1, visible=False)
                 kosmos_input = gr.Radio(["Brief", "Detailed"], label="Kosmos Description Type", value="Brief", visible=False)
                 run_button = gr.Button(label="Run", visible=True)
+                # with gr.Accordion("Advanced options", open=False) as advanced_options:
+                #     box_threshold = gr.Slider(
+                #         label="Box Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.001
+                #     )
+                #     text_threshold = gr.Slider(
+                #         label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
+                #     )
+                #     iou_threshold = gr.Slider(
+                #         label="IOU Threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.001
+                #     )
+                #     inpaint_mode = gr.Radio(["merge", "first"], value="merge", label="inpaint_mode")
+                #     with gr.Row():
+                #         with gr.Column(scale=1):
+                #             remove_mode = gr.Radio(["segment", "rectangle"],  value="segment", label='remove mode')
+                #         with gr.Column(scale=1):
+                #             remove_mask_extend = gr.Textbox(label="remove_mask_extend", value='10')
             with gr.Column():
                 image_gallery = gr.Gallery(label="result images", show_label=True, elem_id="gallery", height=512, visible=True
                 selected.change(update_output_image, [kosmos_output, kosmos_output, entity_output, selected], [kosmos_output])
             run_button.click(fn=run_anything_task, inputs=[
+                            input_image, selected_points, original_image, task_type,
+                            mask_source_radio],
                             outputs=[image_gallery, image_gallery, time_cost, time_cost, kosmos_output, kosmos_text_output, entity_output], show_progress=True, queue=True)
             mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
+                            outputs=[mask_source_radio, num_relation])
             task_type.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
+                            outputs=[mask_source_radio, num_relation,
+                            image_gallery, kosmos_input, kosmos_output, kosmos_text_output, input_image, selected_points, undo_point_button, clear_point_button
                             ])
         # DESCRIPTION = f'### This demo from [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything). <br>'
     if sam_enable:
         load_sam_model(device)
+    if inpainting_enable:
+        load_sd_model(device)
+        load_i2sb_model()
     # if lama_cleaner_enable:
     #     load_lama_cleaner_model(device)