Spaces:

matteomarjanovic
/

draptic-demo

Running

App Files Files Community

matteomarjanovic commited on Feb 13

Commit

21e5fc0

1 Parent(s): a15d459

add prompt generation

Browse files

Files changed (2) hide show

app.py +44 -3
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -6,9 +6,11 @@ import spaces #[uncomment to use ZeroGPU]
 from diffusers import DiffusionPipeline
 import torch
 import subprocess
 subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_repo_id = "black-forest-labs/FLUX.1-schnell"  # Replace to the model you would like to use
 lora_path = "matteomarjanovic/flatsketcher"
@@ -26,6 +28,11 @@ pipe.load_lora_weights(lora_path, weight_name=weigths_file)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 @spaces.GPU #[uncomment to use ZeroGPU]
 def infer(
@@ -56,6 +63,28 @@ def infer(
     return image, seed
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
@@ -70,11 +99,15 @@ css = """
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Row():
         with gr.Column(elem_id="col-input-image"):
             gr.Markdown(" # Drop your image here")
-            gr.Image()
         with gr.Column(elem_id="col-container"):
             gr.Markdown(" # Text-to-Image Gradio Template")
@@ -148,8 +181,16 @@ with gr.Blocks(css=css) as demo:
             triggers=[run_button.click, prompt.submit],
             fn=infer,
             inputs=[
-                prompt,
-                negative_prompt,
                 seed,
                 randomize_seed,
                 width,

 from diffusers import DiffusionPipeline
 import torch
 import subprocess
+from transformers import IdeficsForVisionText2Text, AutoProcessor
 subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
+# Load FLUX image generator
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_repo_id = "black-forest-labs/FLUX.1-schnell"  # Replace to the model you would like to use
 lora_path = "matteomarjanovic/flatsketcher"
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
+# Load IDEFICS model for generate the prompt
+checkpoint = "HuggingFaceM4/idefics-9b"
+processor = AutoProcessor.from_pretrained(checkpoint)
+idefics_model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
 @spaces.GPU #[uncomment to use ZeroGPU]
 def infer(
     return image, seed
+@spaces.GPU #[uncomment to use ZeroGPU]
+def generate_description_fn(
+    image,
+    progress=gr.Progress(track_tqdm=True),
+):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    prompt = [
+        "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+    ]
+    generator = torch.Generator().manual_seed(seed)
+    inputs = processor(prompt, return_tensors="pt").to("cuda")
+    bad_words_ids = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
+    generated_ids = idefics_model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
+    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+    return generated_text[0]
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
 }
 """
+generated_prompt = ""
 with gr.Blocks(css=css) as demo:
     with gr.Row():
         with gr.Column(elem_id="col-input-image"):
             gr.Markdown(" # Drop your image here")
+            input_image = gr.Image()
+            generate_button = gr.Button("Generate", scale=0, variant="primary")
+            generated_prompt_md = gr.Markdown(generated_prompt)
         with gr.Column(elem_id="col-container"):
             gr.Markdown(" # Text-to-Image Gradio Template")
             triggers=[run_button.click, prompt.submit],
             fn=infer,
             inputs=[
+                input_image
+            ],
+            outputs=[generated_prompt],
+        )
+        gr.on(
+            triggers=[generate_button.click],
+            fn=generate_description_fn,
+            inputs=[
+                input_image,
                 seed,
                 randomize_seed,
                 width,

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ torch
 transformers
 xformers
 sentencepiece
-peft

 transformers
 xformers
 sentencepiece
+peft
+bitsandbytes