Spaces:

flax-community
/

dalle-mini

Running

Pedro Cuenca commited on Jul 19, 2021

Commit

85eab14

1 Parent(s): 8944cc5

Integrate current UI in demo app.

Note that the port number has been removed. I suppose hf's Spaces will
forward requests to the default service port.

Former-commit-id: acb4488ee9887246a28a5c2358bafbda0e29355d

Files changed (2) hide show

app/app_gradio.py +49 -13
app/ui_gradio.py +2 -2

app/app_gradio.py CHANGED Viewed

@@ -163,7 +163,7 @@ def clip_top_k(prompt, images, k=8):
     scores = np.array(logits[0]).argsort()[-k:][::-1]
     return [images[score] for score in scores]
-def captioned_strip(images, caption):
     increased_h = 0 if caption is None else 48
     w, h = images[0].size[0], images[0].size[1]
     img = Image.new("RGB", (len(images)*w, h + increased_h))
@@ -176,19 +176,55 @@ def captioned_strip(images, caption):
         draw.text((20, 3), caption, (255,255,255), font=font)
     return img
-def run_inference(prompt, num_images=64, num_preds=8):
-    images = hallucinate(prompt, num_images=num_images)
-    images = clip_top_k(prompt, images, k=num_preds)
-    predictions_strip = captioned_strip(images, None)
-    return predictions_strip
 gr.Interface(run_inference,
     inputs=[gr.inputs.Textbox(label='Prompt')], #, gr.inputs.Slider(1,64,1,8, label='Candidates to generate'), gr.inputs.Slider(1,8,1,1, label='Best predictions to show')],
-    outputs=gr.outputs.Image(label='Generated image'),
-    title='DALLE-mini - HuggingFace Community Week',
-    description='This is a demo of the DALLE-mini model trained with Jax/Flax on TPU v3-8s during the HuggingFace Community Week',
-    article="<p style='text-align: center'> DALLE-mini by Boris Dayma et al. | <a href='https://github.com/borisdayma/dalle-mini'>GitHub</a></p>",
     layout='vertical',
     theme='huggingface',
-    examples=[['an armchair in the shape of an avocado']],
-    server_port=8999).launch(share=True)

     scores = np.array(logits[0]).argsort()[-k:][::-1]
     return [images[score] for score in scores]
+def compose_predictions(images, caption=None):
     increased_h = 0 if caption is None else 48
     w, h = images[0].size[0], images[0].size[1]
     img = Image.new("RGB", (len(images)*w, h + increased_h))
         draw.text((20, 3), caption, (255,255,255), font=font)
     return img
+def top_k_predictions(prompt, num_candidates=32, k=8):
+    images = hallucinate(prompt, num_images=num_candidates)
+    images = clip_top_k(prompt, images, k=k)
+    return images
+def run_inference(prompt, num_images=32, num_preds=8):
+    images = top_k_predictions(prompt, num_candidates=num_images, k=num_preds)
+    predictions = compose_predictions(images)
+    output_title = f"""
+    <p style="font-size:22px; font-style:bold">Best predictions</p>
+    <p>We asked our model to generate 32 candidates for your prompt:</p>
+    <pre>
+    <b>{prompt}</b>
+    </pre>
+    <p>We then used a pre-trained <a href="https://huggingface.co/openai/clip-vit-base-patch32">CLIP model</a> to score them according to the
+    similarity of the text and the image representations.</p>
+    <p>This is the result:</p>
+    """
+    output_description = """
+    <p>Read more about the process <a href="https://wandb.ai/dalle-mini/dalle-mini/reports/DALL-E-mini--Vmlldzo4NjIxODA">in our report</a>.<p>
+    <p style='text-align: center'>Created with <a href="https://github.com/borisdayma/dalle-mini">DALLE·mini</a></p>
+    """
+    return (output_title, predictions, output_description)
+outputs = [
+    gr.outputs.HTML(label=""),      # To be used as title
+    gr.outputs.Image(label=''),
+    gr.outputs.HTML(label=""),      # Additional text that appears in the screenshot
+]
+description = """
+Welcome to our demo of DALL·E-mini. This project was created on TPU v3-8s during the 🤗 Flax / JAX Community Week.
+It reproduces the essential characteristics of OpenAI's DALL·E, at a fraction of the size.
+Please, write what you would like the model to generate, or select one of the examples below.
+"""
 gr.Interface(run_inference,
     inputs=[gr.inputs.Textbox(label='Prompt')], #, gr.inputs.Slider(1,64,1,8, label='Candidates to generate'), gr.inputs.Slider(1,8,1,1, label='Best predictions to show')],
+    outputs=outputs,
+    title='DALL·E mini',
+    description=description,
+    article="<p style='text-align: center'> DALLE·mini by Boris Dayma et al. | <a href='https://github.com/borisdayma/dalle-mini'>GitHub</a></p>",
     layout='vertical',
     theme='huggingface',
+    examples=[['an armchair in the shape of an avocado'], ['snowy mountains by the sea']],
+    allow_flagging=False,
+    live=False,
+    # server_port=8999
+).launch()

app/ui_gradio.py CHANGED Viewed

@@ -51,8 +51,8 @@ def run_inference(prompt, num_images=32, num_preds=8):
     <b>{prompt}</b>
     </pre>
-    <p>We then used a pre-trained CLIP model to score them according to the
-    similarity of their text and image representations.</p>
     <p>This is the result:</p>
     """

     <b>{prompt}</b>
     </pre>
+    <p>We then used a pre-trained <a href="https://huggingface.co/openai/clip-vit-base-patch32">CLIP model</a> to score them according to the
+    similarity of the text and the image representations.</p>
     <p>This is the result:</p>
     """