Spaces:

chenglu
/

gradio-test

Sleeping

App Files Files Community

chenglu commited on May 29

Commit

f5ccf14

1 Parent(s): d99a437

transformers

Browse files

Files changed (1) hide show

app.py +40 -62

app.py CHANGED Viewed

@@ -1,77 +1,55 @@
-import torch
-from transformers import AutoTokenizer, AutoModel, AutoProcessor, Blip2ForConditionalGeneration
-import gradio as gr
-# Load the Chinese conversational model (ChatGLM 6B, int4 quantized version)
-model_name = "THUDM/chatglm2-6b-int4"
-print(f"Loading conversation model: {model_name}")
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
-# If GPU is available, use half precision on GPU for faster inference
 if torch.cuda.is_available():
-    model = model.half().cuda()
-model.eval()
-# Load the image captioning model (BLIP-2 with OPT 2.7B LLM)
-vision_model = "Salesforce/blip2-opt-2.7b"
-print(f"Loading image captioning model: {vision_model}")
-processor = AutoProcessor.from_pretrained(vision_model)
-blip_model = Blip2ForConditionalGeneration.from_pretrained(
-    vision_model,
-    torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
-    device_map=("auto" if torch.cuda.is_available() else None)
 )
-# Ensure BLIP model on CPU if no GPU
 if not torch.cuda.is_available():
-    blip_model = blip_model.to("cpu")
-# Function: generate a descriptive caption for the image using BLIP-2
 def describe_image(image):
-    inputs = processor(image, return_tensors="pt").to(blip_model.device)
-    generated_ids = blip_model.generate(**inputs, max_new_tokens=50)
-    caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
-    return caption
-# Event handler: when a new image is uploaded
-def on_image_upload(image):
-    # Reset histories for a new conversation
-    history = []       # model's conversation history
-    chat_history = []  # chat display history for Gradio
-    # Describe the uploaded artwork image
-    caption = describe_image(image)
-    # Build the prompt for the conversational model (include the image description)
-    prompt = f"这是一幅艺术作品图像，其内容是: {caption}。请对此艺术作品进行简要的介绍和分析。"
-    # Generate the initial analysis using the conversation model
-    response, history = model.chat(tokenizer, prompt, history=history)
-    # Add the image (user side) and the model's response (assistant side) to chat history
-    chat_history.append([image, response])
-    return chat_history, history
-# Event handler: when the user sends a new text message (question)
-def on_user_message(user_message, chat_history, history):
-    chat_history = chat_history or []
-    # Append the user's question and an empty response placeholder
-    chat_history.append([user_message, ""])
-    # Use streaming response from the model
-    for output, new_history in model.stream_chat(tokenizer, user_message, history):
-        # Update the assistant's response in the chat history
-        chat_history[-1][1] = output
-        # Yield the updated chat history and model history for streaming in UI
-        yield chat_history, new_history
-# Build Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# AI 艺术品讲解智能体")
-    gr.Markdown("上传一张艺术品图像，让 AI 为您描述这件艺术作品，并回答有关它的问题。")
-    image_input = gr.Image(label="上传艺术品图像", type="pil")
     chatbot = gr.Chatbot()
-    user_input = gr.Textbox(label="询问问题", placeholder="请输入关于这幅作品的提问...")
-    state = gr.State()  # state to store model history
-    # Connect events
-    image_input.upload(fn=on_image_upload, inputs=image_input, outputs=[chatbot, state])
-    user_input.submit(fn=on_user_message, inputs=[user_input, chatbot, state], outputs=[chatbot, state])
-    user_input.submit(lambda: "", inputs=[], outputs=[user_input])  # clear input field
-# Launch the app (if running locally; not required in HF Spaces)
 if __name__ == "__main__":
-    demo.queue().launch(share=True)

+import torch, gradio as gr
+from transformers import (
+    AutoTokenizer, AutoModel,
+    AutoProcessor, Blip2ForConditionalGeneration
+)
+# --------模型加载--------
+chat_model_name = "THUDM/chatglm2-6b-int4"
+vision_model_name = "Salesforce/blip2-opt-2.7b"
+tokenizer = AutoTokenizer.from_pretrained(chat_model_name, trust_remote_code=True)
+chat_model = AutoModel.from_pretrained(chat_model_name, trust_remote_code=True).eval()
 if torch.cuda.is_available():
+    chat_model = chat_model.half().cuda()
+processor = AutoProcessor.from_pretrained(vision_model_name)
+vision_model = Blip2ForConditionalGeneration.from_pretrained(
+    vision_model_name,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    device_map="auto" if torch.cuda.is_available() else None,
 )
 if not torch.cuda.is_available():
+    vision_model = vision_model.to("cpu")
+# --------工具函数--------
 def describe_image(image):
+    inputs = processor(image, return_tensors="pt").to(vision_model.device)
+    ids = vision_model.generate(**inputs, max_new_tokens=50)
+    return processor.batch_decode(ids, skip_special_tokens=True)[0].strip()
+def on_image(img):
+    caption = describe_image(img)
+    sys_prompt = f"这是一幅艺术作品图像: {caption}。请为普通观众做简介。"
+    answer, hist = chat_model.chat(tokenizer, sys_prompt, history=[])
+    return [[img, answer]], hist
+def on_chat(msg, chat_hist, hist):
+    chat_hist = chat_hist or []
+    chat_hist.append([msg, ""])
+    for out, h in chat_model.stream_chat(tokenizer, msg, history=hist):
+        chat_hist[-1][1] = out
+        yield chat_hist, h
+# --------Gradio 界面--------
 with gr.Blocks() as demo:
     gr.Markdown("# AI 艺术品讲解智能体")
+    image = gr.Image(type="pil", label="上传艺术品")
     chatbot = gr.Chatbot()
+    txt = gr.Textbox(label="提问")
+    state = gr.State()
+    image.upload(on_image, image, [chatbot, state])
+    txt.submit(on_chat, [txt, chatbot, state], [chatbot, state]).then(lambda: "", None, txt)
 if __name__ == "__main__":
+    demo.queue(concurrency_count=2).launch(share=True)