Spaces:

zai-org
/

CogVLM-CogAgent

Running on CPU Upgrade

App Files Files Community

lykeven commited on May 21, 2024

Commit

d5e8a13

1 Parent(s): ba58453

update model and gradio version

Browse files

Files changed (9) hide show

app.py +19 -49
examples/chemistry.png +0 -0
examples/example_inputs.jsonl +6 -6
examples/guicai.jpeg +0 -0
examples/poem.jpeg +0 -0
examples/sota.jpeg +0 -0
examples/triangle.jpeg +0 -0
examples/watermelon.jpeg +0 -0
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -8,16 +8,12 @@ import time
 from concurrent.futures import ThreadPoolExecutor
 from utils import is_chinese, process_image_without_resize, parse_response, templates_agent_cogagent, template_grounding_cogvlm, postprocess_text
-DESCRIPTION = '''<h2 style='text-align: center'> <a href="https://github.com/THUDM/CogVLM2"> CogVLM2 </a>& <a href="https://github.com/THUDM/CogVLM">CogAgent Chat Demo</a> </h2>'''
-NOTES = 'This app is adapted from <a href="https://github.com/THUDM/CogVLM">https://github.com/THUDM/CogVLM2</a> and <a href="https://github.com/THUDM/CogVLM">https://github.com/THUDM/CogVLM</a>. It would be recommended to check out the repo if you want to see the detail of our model.\n\n该demo仅作为测试使用，不支持批量请求。如有大批量需求，欢迎联系[智谱AI](mailto:[email protected])。\n\n请注意该Demo目前仅支持英文，<a href="http://36.103.203.44:7861/">备用网页</a>支持中文。'
 MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.<br>Hint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'
-GROUNDING_NOTICE = 'Hint: When you check "Grounding", please use the <a href="https://github.com/THUDM/CogVLM/blob/main/utils/utils/template.py#L344">corresponding prompt</a> or the examples below.'
-AGENT_NOTICE = 'Hint: When you check "CogAgent", please use the <a href="https://github.com/THUDM/CogVLM/blob/main/utils/utils/template.py#L761C1-L761C17">corresponding prompt</a> or the examples below.'
 default_chatbox = [("", "Hi, What do you want to know about this image?")]
@@ -36,10 +32,7 @@ def post(
         image_prompt,
         result_previous,
         hidden_image,
-        grounding,
-        cogagent,
-        grounding_template,
-        agent_template
         ):
     result_text = [(ele[0], ele[1]) for ele in result_previous]
     for i in range(len(result_text)-1, -1, -1):
@@ -47,7 +40,7 @@ def post(
             del result_text[i]
     print(f"history {result_text}")
-    is_zh = is_chinese(input_text)
     if image_prompt is None:
         print("Image empty")
@@ -77,24 +70,13 @@ def post(
         encoded_img = None
     model_use = "vlm_chat"
-    if not cogagent and grounding:
-        model_use = "vlm_grounding"
-        if grounding_template:
-            input_text = postprocess_text(grounding_template, input_text)
-    elif cogagent:
-        model_use = "agent_chat"
-        if agent_template is not None and agent_template != "do not use template":
-            input_text = postprocess_text(agent_template, input_text)
     prompt = input_text
-    if grounding:
-        prompt += "(with grounding)"
-    print(f'request {model_use} model... with prompt {prompt}, grounding_template {grounding_template}, agent_template {agent_template}')
     data = json.dumps({
         'model_use': model_use,
-        'is_grounding': grounding,
         'text': prompt,
         'history': result_text,
         'image': encoded_img,
@@ -121,13 +103,7 @@ def post(
     # response = {'result':input_text}
     answer = str(response['result'])
-    if grounding:
-        parse_response(pil_img, answer, image_path_grounding)
-        new_answer = answer.replace(input_text, "")
-        result_text.append((input_text, new_answer))
-        result_text.append((None, (image_path_grounding,)))
-    else:
-        result_text.append((input_text, answer))
     print(result_text)
     print('finished')
     return "", result_text, hidden_image
@@ -164,34 +140,28 @@ def main():
                     image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
                 with gr.Row():
-                    grounding = gr.Checkbox(label="Grounding")
-                    cogagent = gr.Checkbox(label="CogAgent")
-                with gr.Row():
-                    # grounding_notice = gr.Markdown(GROUNDING_NOTICE)
-                    grounding_template = gr.Dropdown(choices=template_grounding_cogvlm, label="Grounding Template", value=template_grounding_cogvlm[0])
-                    # agent_notice = gr.Markdown(AGENT_NOTICE)
-                    agent_template = gr.Dropdown(choices=templates_agent_cogagent, label="Agent Template", value=templates_agent_cogagent[0])
                 with gr.Row():
-                    temperature = gr.Slider(maximum=1, value=0.9, minimum=0, label='Temperature')
-                    top_p = gr.Slider(maximum=1, value=0.8, minimum=0, label='Top P')
-                    top_k = gr.Slider(maximum=50, value=5, minimum=1, step=1, label='Top K')
             with gr.Column(scale=5.5):
                 result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[("", "Hi, What do you want to know about this image?")], height=550)
                 hidden_image_hash = gr.Textbox(visible=False)
-        gr_examples = gr.Examples(examples=[[example["text"], example["image"], example["grounding"], example["cogagent"]] for example in examples],
-                                  inputs=[input_text, image_prompt, grounding, cogagent],
                                   label="Example Inputs (Click to insert an examplet into the input box)",
                                   examples_per_page=6)
         gr.Markdown(MAINTENANCE_NOTICE1)
         print(gr.__version__)
-        run_button.click(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template],
                          outputs=[input_text, result_text, hidden_image_hash])
-        input_text.submit(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template],
                          outputs=[input_text, result_text, hidden_image_hash])
         clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
         image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
@@ -199,8 +169,8 @@ def main():
         print(gr.__version__)
-    demo.queue(concurrency_count=10)
-    demo.launch()
 if __name__ == '__main__':
     main()

 from concurrent.futures import ThreadPoolExecutor
 from utils import is_chinese, process_image_without_resize, parse_response, templates_agent_cogagent, template_grounding_cogvlm, postprocess_text
+DESCRIPTION = '''<h2 style='text-align: center'> <a href="https://github.com/THUDM/CogVLM2"> CogVLM2 </a></h2>'''
+NOTES = 'This app is adapted from <a href="https://github.com/THUDM/CogVLM">https://github.com/THUDM/CogVLM2</a> . It would be recommended to check out the repo if you want to see the detail of our model.\n\n该demo仅作为测试使用，不支持批量请求。如有大批量需求，欢迎联系[智谱AI](mailto:[email protected])。\n<a href="http://36.103.203.44:7861/">备用链接</a>'
 MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.<br>Hint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'
 default_chatbox = [("", "Hi, What do you want to know about this image?")]
         image_prompt,
         result_previous,
         hidden_image,
+        is_english,
         ):
     result_text = [(ele[0], ele[1]) for ele in result_previous]
     for i in range(len(result_text)-1, -1, -1):
             del result_text[i]
     print(f"history {result_text}")
+    is_zh = not is_english
     if image_prompt is None:
         print("Image empty")
         encoded_img = None
     model_use = "vlm_chat"
+    if not is_english:
+        model_use = "vlm_chat_zh"
     prompt = input_text
+    print(f'request {model_use} model... with prompt {prompt}')
     data = json.dumps({
         'model_use': model_use,
         'text': prompt,
         'history': result_text,
         'image': encoded_img,
     # response = {'result':input_text}
     answer = str(response['result'])
+    result_text.append((input_text, answer))
     print(result_text)
     print('finished')
     return "", result_text, hidden_image
                     image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
                 with gr.Row():
+                    is_english = gr.Checkbox(label="Use English Model")
                 with gr.Row():
+                    temperature = gr.Slider(maximum=1, value=0.8, minimum=0, label='Temperature')
+                    top_p = gr.Slider(maximum=1, value=0.4, minimum=0, label='Top P')
+                    top_k = gr.Slider(maximum=50, value=1, minimum=1, step=1, label='Top K')
             with gr.Column(scale=5.5):
                 result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[("", "Hi, What do you want to know about this image?")], height=550)
                 hidden_image_hash = gr.Textbox(visible=False)
+        gr_examples = gr.Examples(examples=[[example["text"], example["image"], example["is_english"]] for example in examples],
+                                  inputs=[input_text, image_prompt, is_english],
                                   label="Example Inputs (Click to insert an examplet into the input box)",
                                   examples_per_page=6)
         gr.Markdown(MAINTENANCE_NOTICE1)
         print(gr.__version__)
+        run_button.click(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, is_english],
                          outputs=[input_text, result_text, hidden_image_hash])
+        input_text.submit(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, is_english],
                          outputs=[input_text, result_text, hidden_image_hash])
         clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
         image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
         print(gr.__version__)
+    demo.launch(max_threads=10)
 if __name__ == '__main__':
     main()

examples/chemistry.png ADDED Viewed

examples/example_inputs.jsonl CHANGED Viewed

@@ -1,6 +1,6 @@
-{"id":1, "text": "Describe this image", "image": "examples/1.png", "grounding": false, "cogagent": false}
-{"id":2, "text": "What is written in the image?", "image": "examples/2.jpg", "grounding": false, "cogagent": false}
-{"id":3, "text": "the tree closer to the sun", "image": "examples/3.jpg", "grounding": true, "cogagent": false}
-{"id":4, "text": "What color are the clothes of the girl whose hands are holding flowers? Let's think step by step", "image": "examples/4.jpg", "grounding": true, "cogagent": false}
-{"id":5, "text": "search CogVLM", "image": "examples/5.jpeg", "grounding": true, "cogagent": true}
-{"id":6, "text": "Insert a new slide named 'In-context learning: Details' with a Two Content layout after the current slide.", "image": "examples/6.jpeg", "grounding": false, "cogagent": true}

+{"id":1, "text": "请详细描述这张图片。", "image": "examples/guicai.jpeg", "is_english": false}
+{"id":2, "text": "输出这段文本。", "image": "examples/poem.jpeg", "is_english": false}
+{"id":3, "text": "用列表形式描述图中的关键步骤。", "image": "examples/chemistry.png", "is_english": false}
+{"id":4, "text": "这道题怎么做？step by step.", "image": "examples/triangle.jpeg", "is_english": false}
+{"id":5, "text": "Please analyze this image and its meaning in detail.", "image": "examples/sota.jpeg", "is_english": true}
+{"id":6, "text": "Which watermelon is the ripest? Provide reasons.", "image": "examples/watermelon.jpeg", "is_english": true}

examples/guicai.jpeg ADDED Viewed

examples/poem.jpeg ADDED Viewed

examples/sota.jpeg ADDED Viewed

examples/triangle.jpeg ADDED Viewed

examples/watermelon.jpeg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-gradio
 seaborn
 Pillow
 matplotlib

+gradio==4.29.0
 seaborn
 Pillow
 matplotlib