BAGEL

Running on Zero

App Files Files Community

likunchang commited on May 26

Commit

717846a

1 Parent(s): cdfd9c4

init

Browse files

Files changed (1) hide show

app.py +34 -52

app.py CHANGED Viewed

@@ -26,12 +26,13 @@ from modeling.qwen2 import Qwen2Tokenizer
 from huggingface_hub import snapshot_download
-model_path = "./model"
 repo_id = "ByteDance-Seed/BAGEL-7B-MoT"
-cache_dir = model_path + "/cache"
-snapshot_download(cache_dir=cache_dir,
-  local_dir=model_path,
   repo_id=repo_id,
   local_dir_use_symlinks=False,
   resume_download=True,
@@ -39,6 +40,7 @@ snapshot_download(cache_dir=cache_dir,
 )
 # Model Initialization
 llm_config = Qwen2Config.from_json_file(os.path.join(model_path, "llm_config.json"))
 llm_config.qk_norm = True
@@ -173,10 +175,17 @@ def text_to_image(prompt, show_thinking=False, cfg_text_scale=4.0, cfg_interval=
         cfg_renorm_type=cfg_renorm_type,
         image_shapes=image_shapes,
     )
     # Call inferencer with or without think parameter based on user choice
-    result = inferencer(text=prompt, think=show_thinking, **inference_hyper)
-    return result["image"], result.get("text", None)
 # Image Understanding function with thinking option and hyperparameters
@@ -198,10 +207,15 @@ def image_understanding(image: Image.Image, prompt: str, show_thinking=False,
         max_think_token_n=max_new_tokens, # Set max_length
     )
     # Use show_thinking parameter to control thinking process
-    result = inferencer(image=image, text=prompt, think=show_thinking,
-                        understanding_output=True, **inference_hyper)
-    return result["text"]
 # Image Editing function with thinking option and hyperparameters
@@ -237,9 +251,14 @@ def edit_image(image: Image.Image, prompt: str, show_thinking=False, cfg_text_sc
     )
     # Include thinking parameter based on user choice
-    result = inferencer(image=image, text=prompt, think=show_thinking, **inference_hyper)
-    return result["image"], result.get("text", "")
 # Helper function to load example images
 def load_example_image(image_path):
@@ -321,22 +340,9 @@ with gr.Blocks() as demo:
             outputs=[thinking_output, thinking_params]
         )
-        # Process function based on thinking option and hyperparameters
-        def process_text_to_image(prompt, show_thinking, cfg_text_scale,
-                                 cfg_interval, timestep_shift,
-                                 num_timesteps, cfg_renorm_min, cfg_renorm_type,
-                                 max_think_token_n, do_sample, text_temperature, seed, image_ratio):
-            image, thinking = text_to_image(
-                prompt, show_thinking, cfg_text_scale, cfg_interval,
-                timestep_shift, num_timesteps,
-                cfg_renorm_min, cfg_renorm_type,
-                max_think_token_n, do_sample, text_temperature, seed, image_ratio
-            )
-            return image, thinking if thinking else ""
         gr.on(
             triggers=[gen_btn.click, txt_input.submit],
-            fn=process_text_to_image,
             inputs=[
                 txt_input, show_thinking, cfg_text_scale,
                 cfg_interval, timestep_shift,
@@ -413,24 +419,9 @@ with gr.Blocks() as demo:
             outputs=[edit_thinking_output, edit_thinking_params]
         )
-        # Process editing with thinking option and hyperparameters
-        def process_edit_image(image, prompt, show_thinking, cfg_text_scale,
-                              cfg_img_scale, cfg_interval,
-                              timestep_shift, num_timesteps, cfg_renorm_min,
-                              cfg_renorm_type, max_think_token_n, do_sample,
-                              text_temperature, seed):
-            edited_image, thinking = edit_image(
-                image, prompt, show_thinking, cfg_text_scale, cfg_img_scale,
-                cfg_interval, timestep_shift,
-                num_timesteps, cfg_renorm_min, cfg_renorm_type,
-                max_think_token_n, do_sample, text_temperature, seed
-            )
-            return edited_image, thinking if thinking else ""
         gr.on(
             triggers=[edit_btn.click, edit_prompt.submit],
-            fn=process_edit_image,
             inputs=[
                 edit_image_input, edit_prompt, edit_show_thinking,
                 edit_cfg_text_scale, edit_cfg_img_scale, edit_cfg_interval,
@@ -467,18 +458,9 @@ with gr.Blocks() as demo:
         img_understand_btn = gr.Button("Submit", variant="primary")
-        # Process understanding with thinking option and hyperparameters
-        def process_understanding(image, prompt, show_thinking, do_sample,
-                                 text_temperature, max_new_tokens):
-            result = image_understanding(
-                image, prompt, show_thinking, do_sample,
-                text_temperature, max_new_tokens
-            )
-            return result
         gr.on(
             triggers=[img_understand_btn.click, understand_prompt.submit],
-            fn=process_understanding,
             inputs=[
                 img_input, understand_prompt, understand_show_thinking,
                 understand_do_sample, understand_text_temperature, understand_max_new_tokens

 from huggingface_hub import snapshot_download
+save_dir = "./model_weights"
 repo_id = "ByteDance-Seed/BAGEL-7B-MoT"
+cache_dir = save_dir + "/cache"
+snapshot_download(
+  cache_dir=cache_dir,
+  local_dir=save_dir,
   repo_id=repo_id,
   local_dir_use_symlinks=False,
   resume_download=True,
 )
 # Model Initialization
+model_path = save_dir
 llm_config = Qwen2Config.from_json_file(os.path.join(model_path, "llm_config.json"))
 llm_config.qk_norm = True
         cfg_renorm_type=cfg_renorm_type,
         image_shapes=image_shapes,
     )
+    result = {"text": "", "image": None}
     # Call inferencer with or without think parameter based on user choice
+    for i in inferencer(text=prompt, think=show_thinking, understanding_output=False, **inference_hyper):
+        print(type(i))
+        if type(i) == str:
+            result["text"] += i
+        else:
+            result["image"] = i
+        yield result["image"], result.get("text", None)
 # Image Understanding function with thinking option and hyperparameters
         max_think_token_n=max_new_tokens, # Set max_length
     )
+    result = {"text": "", "image": None}
     # Use show_thinking parameter to control thinking process
+    for i in inferencer(image=image, text=prompt, think=show_thinking,
+                        understanding_output=True, **inference_hyper):
+        if type(i) == str:
+            result["text"] += i
+        else:
+            result["image"] = i
+        yield result["text"]
 # Image Editing function with thinking option and hyperparameters
     )
     # Include thinking parameter based on user choice
+    result = {"text": "", "image": None}
+    for i in inferencer(image=image, text=prompt, think=show_thinking, understanding_output=False, **inference_hyper):
+        if type(i) == str:
+            result["text"] += i
+        else:
+            result["image"] = i
+        yield result["image"], result.get("text", "")
 # Helper function to load example images
 def load_example_image(image_path):
             outputs=[thinking_output, thinking_params]
         )
         gr.on(
             triggers=[gen_btn.click, txt_input.submit],
+            fn=text_to_image,
             inputs=[
                 txt_input, show_thinking, cfg_text_scale,
                 cfg_interval, timestep_shift,
             outputs=[edit_thinking_output, edit_thinking_params]
         )
         gr.on(
             triggers=[edit_btn.click, edit_prompt.submit],
+            fn=edit_image,
             inputs=[
                 edit_image_input, edit_prompt, edit_show_thinking,
                 edit_cfg_text_scale, edit_cfg_img_scale, edit_cfg_interval,
         img_understand_btn = gr.Button("Submit", variant="primary")
         gr.on(
             triggers=[img_understand_btn.click, understand_prompt.submit],
+            fn=image_understanding,
             inputs=[
                 img_input, understand_prompt, understand_show_thinking,
                 understand_do_sample, understand_text_temperature, understand_max_new_tokens