BLIP2

Sleeping

App Files Files Community

Dongxu Li commited on Feb 1, 2023

Commit

f7f5be8

1 Parent(s): 8f68280

fix missing rep_penalty.

Browse files

Files changed (1) hide show

app.py +31 -36

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from io import BytesIO
 import string
 import gradio as gr
 import requests
-from PIL import Image
 from utils import Endpoint
@@ -15,7 +14,10 @@ def encode_image(image):
     return buffered
-def query_api(image, prompt, decoding_method, temperature, len_penalty, repetition_penalty):
     url = endpoint.url
     headers = {"User-Agent": "BLIP-2 HuggingFace Space"}
@@ -60,8 +62,11 @@ def inference(
     history.append(text_input)
     prompt = " ".join(history)
-    output = query_api(image, prompt, decoding_method, temperature, length_penalty, repetition_penalty)
     output = postprocess_output(output)
     history += output
@@ -69,37 +74,23 @@ def inference(
         (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)
     ]  # convert to tuples of list
-    return chat, history
-# image source: https://m.facebook.com/112483753737319/photos/112489593736735/
-endpoint = Endpoint()
-examples = [
-    ["house.png", "How could someone get out of the house?"],
-    [
-        "sunset.png",
-        "Write a romantic message that goes along this photo.",
-    ],
-]
-# outputs = ["chatbot", "state"]
 title = """<h1 align="center">BLIP-2</h1>"""
 description = """Gradio demo for BLIP-2, a multimodal chatbot from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Please visit our <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'>project webpage</a>.</p>
 <p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.12086' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>"
-# iface = gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples)
-def reset_all(text_input, image_input, chatbot, history):
-    return "", None, None, []
-def reset_chatbot(chatbot, history):
-    return None, []
 with gr.Blocks() as iface:
     state = gr.State([])
@@ -139,25 +130,30 @@ with gr.Blocks() as iface:
                 rep_penalty = gr.Slider(
                     minimum=1.0,
-                    maximum=10.0,
-                    value=1.0,
                     step=0.5,
                     interactive=True,
                     label="Repetition Penalty",
                 )
         with gr.Column():
-            chatbot = gr.Chatbot()
             with gr.Row():
                 clear_button = gr.Button(value="Clear", interactive=True)
                 clear_button.click(
-                    reset_all,
-                    [text_input, image_input, chatbot, state],
                     [text_input, image_input, chatbot, state],
                 )
-                submit_button = gr.Button(value="Submit", interactive=True, variant="primary")
                 submit_button.click(
                     inference,
                     [
@@ -166,17 +162,16 @@ with gr.Blocks() as iface:
                         sampling,
                         temperature,
                         len_penalty,
                         state,
                     ],
                     [chatbot, state],
                 )
-    image_input.change(reset_chatbot, [chatbot, state], [chatbot, state])
     examples = gr.Examples(
         examples=examples,
         inputs=[image_input, text_input],
     )
-iface.queue(concurrency_count=1)
-iface.launch(enable_queue=True, debug=True)

 import string
 import gradio as gr
 import requests
 from utils import Endpoint
     return buffered
+def query_api(
+    image, prompt, decoding_method, temperature, len_penalty, repetition_penalty
+):
     url = endpoint.url
     headers = {"User-Agent": "BLIP-2 HuggingFace Space"}
     history.append(text_input)
     prompt = " ".join(history)
+    print(prompt)
+    output = query_api(
+        image, prompt, decoding_method, temperature, length_penalty, repetition_penalty
+    )
     output = postprocess_output(output)
     history += output
         (history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)
     ]  # convert to tuples of list
+    return {chatbot: chat, state: history}
 title = """<h1 align="center">BLIP-2</h1>"""
 description = """Gradio demo for BLIP-2, a multimodal chatbot from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them. Please visit our <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'>project webpage</a>.</p>
 <p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2201.12086' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>"
+endpoint = Endpoint()
+examples = [
+    ["house.png", "How could someone get out of the house?"],
+    # [
+    #     "sunset.png",
+    #     "Write a romantic message that goes along this photo.",
+    # ],
+]
 with gr.Blocks() as iface:
     state = gr.State([])
                 rep_penalty = gr.Slider(
                     minimum=1.0,
+                    maximum=20.0,
+                    value=10.0,
                     step=0.5,
                     interactive=True,
                     label="Repetition Penalty",
                 )
         with gr.Column():
+            with gr.Row():
+                chatbot = gr.Chatbot()
+                image_input.change(lambda: (None, []), [], [chatbot, state])
             with gr.Row():
                 clear_button = gr.Button(value="Clear", interactive=True)
                 clear_button.click(
+                    lambda: ("", None, [], []),
+                    [],
                     [text_input, image_input, chatbot, state],
                 )
+                submit_button = gr.Button(
+                    value="Submit", interactive=True, variant="primary"
+                )
                 submit_button.click(
                     inference,
                     [
                         sampling,
                         temperature,
                         len_penalty,
+                        rep_penalty,
                         state,
                     ],
                     [chatbot, state],
                 )
     examples = gr.Examples(
         examples=examples,
         inputs=[image_input, text_input],
     )
+iface.queue(concurrency_count=1, api_open=False, max_size=20)
+iface.launch(enable_queue=True)