Spaces:

chenglu
/

gradio-test

Sleeping

App Files Files Community

chenglu commited on May 29

Commit

e54d7aa

1 Parent(s): f5ccf14

transformers

Browse files

Files changed (1) hide show

app.py +242 -51

app.py CHANGED Viewed

@@ -1,55 +1,246 @@
-import torch, gradio as gr
-from transformers import (
-    AutoTokenizer, AutoModel,
-    AutoProcessor, Blip2ForConditionalGeneration
-)
-# --------模型加载--------
-chat_model_name = "THUDM/chatglm2-6b-int4"
-vision_model_name = "Salesforce/blip2-opt-2.7b"
-tokenizer = AutoTokenizer.from_pretrained(chat_model_name, trust_remote_code=True)
-chat_model = AutoModel.from_pretrained(chat_model_name, trust_remote_code=True).eval()
 if torch.cuda.is_available():
-    chat_model = chat_model.half().cuda()
-processor = AutoProcessor.from_pretrained(vision_model_name)
-vision_model = Blip2ForConditionalGeneration.from_pretrained(
-    vision_model_name,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto" if torch.cuda.is_available() else None,
-)
-if not torch.cuda.is_available():
-    vision_model = vision_model.to("cpu")
-# --------工具函数--------
 def describe_image(image):
-    inputs = processor(image, return_tensors="pt").to(vision_model.device)
-    ids = vision_model.generate(**inputs, max_new_tokens=50)
-    return processor.batch_decode(ids, skip_special_tokens=True)[0].strip()
-def on_image(img):
-    caption = describe_image(img)
-    sys_prompt = f"这是一幅艺术作品图像: {caption}。请为普通观众做简介。"
-    answer, hist = chat_model.chat(tokenizer, sys_prompt, history=[])
-    return [[img, answer]], hist
-def on_chat(msg, chat_hist, hist):
-    chat_hist = chat_hist or []
-    chat_hist.append([msg, ""])
-    for out, h in chat_model.stream_chat(tokenizer, msg, history=hist):
-        chat_hist[-1][1] = out
-        yield chat_hist, h
-# --------Gradio 界面--------
-with gr.Blocks() as demo:
-    gr.Markdown("# AI 艺术品讲解智能体")
-    image = gr.Image(type="pil", label="上传艺术品")
-    chatbot = gr.Chatbot()
-    txt = gr.Textbox(label="提问")
-    state = gr.State()
-    image.upload(on_image, image, [chatbot, state])
-    txt.submit(on_chat, [txt, chatbot, state], [chatbot, state]).then(lambda: "", None, txt)
 if __name__ == "__main__":
-    demo.queue(concurrency_count=2).launch(share=True)

+import torch
+from transformers import AutoTokenizer, AutoModel, AutoProcessor, Blip2ForConditionalGeneration
+import gradio as gr
+import gc
+from PIL import Image
+# 检查设备和内存
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
 if torch.cuda.is_available():
+    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
+# 全局变量存储模型
+tokenizer = None
+model = None
+processor = None
+blip_model = None
+def load_models():
+    """延迟加载模型以优化内存使用"""
+    global tokenizer, model, processor, blip_model
+    try:
+        # 加载对话模型 (ChatGLM2-6B int4量化版本)
+        model_name = "THUDM/chatglm2-6b-int4"
+        print(f"正在加载对话模型: {model_name}")
+        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        model = AutoModel.from_pretrained(
+            model_name,
+            trust_remote_code=True,
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32
+        )
+        if device == "cuda":
+            model = model.half().cuda()
+        model.eval()
+        print("✅ 对话模型加载完成")
+        # 加载图像理解模型 (BLIP-2)
+        vision_model = "Salesforce/blip2-opt-2.7b"
+        print(f"正在加载图像理解模型: {vision_model}")
+        processor = AutoProcessor.from_pretrained(vision_model)
+        blip_model = Blip2ForConditionalGeneration.from_pretrained(
+            vision_model,
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+            device_map="auto" if device == "cuda" else None,
+            load_in_8bit=True if device == "cuda" else False  # 使用8bit量化节省内存
+        )
+        if device == "cpu":
+            blip_model = blip_model.to("cpu")
+        print("✅ 图像理解模型加载完成")
+        return True
+    except Exception as e:
+        print(f"❌ 模型加载失败: {str(e)}")
+        return False
 def describe_image(image):
+    """使用BLIP-2生成图像描述"""
+    if blip_model is None or processor is None:
+        return "模型未正确加载"
+    try:
+        # 确保图像格式正确
+        if not isinstance(image, Image.Image):
+            image = Image.fromarray(image)
+        # 预处理图像
+        inputs = processor(image, return_tensors="pt")
+        # 移动到正确的设备
+        if device == "cuda":
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+        # 生成描述
+        with torch.no_grad():
+            generated_ids = blip_model.generate(
+                **inputs,
+                max_new_tokens=50,
+                num_beams=3,
+                temperature=0.7,
+                do_sample=True
+            )
+        caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+        return caption
+    except Exception as e:
+        print(f"图像描述生成错误: {str(e)}")
+        return f"图像描述生成失败: {str(e)}"
+def on_image_upload(image):
+    """处理图像上传事件"""
+    if image is None:
+        return [], []
+    try:
+        # 重置对话历史
+        history = []
+        chat_history = []
+        # 生成图像描述
+        caption = describe_image(image)
+        print(f"图像描述: {caption}")
+        # 构建提示词
+        prompt = f"这是一幅艺术作品图像，其内容是: {caption}。请对此艺术作品进行详细的介绍和分析，包括艺术风格、创作技法、可能的历史背景等方面。"
+        # 生成初始分析
+        if model is not None and tokenizer is not None:
+            try:
+                with torch.no_grad():
+                    response, history = model.chat(tokenizer, prompt, history=history)
+                chat_history.append([image, response])
+            except Exception as e:
+                print(f"对话生成错误: {str(e)}")
+                chat_history.append([image, f"分析生成失败: {str(e)}"])
+        else:
+            chat_history.append([image, "对话模型未正确加载"])
+        return chat_history, history
+    except Exception as e:
+        print(f"图像上传处理错误: {str(e)}")
+        return [[None, f"处理失败: {str(e)}"]], []
+def on_user_message(user_message, chat_history, history):
+    """处理用户消息"""
+    if not user_message.strip():
+        yield chat_history, history
+        return
+    if model is None or tokenizer is None:
+        chat_history = chat_history or []
+        chat_history.append([user_message, "对话模型未正确加载"])
+        yield chat_history, history
+        return
+    try:
+        chat_history = chat_history or []
+        chat_history.append([user_message, ""])
+        # 使用流式响应
+        for output, new_history in model.stream_chat(tokenizer, user_message, history):
+            chat_history[-1][1] = output
+            yield chat_history, new_history
+    except Exception as e:
+        print(f"用户消息处理错误: {str(e)}")
+        chat_history[-1][1] = f"回复生成失败: {str(e)}"
+        yield chat_history, history
+def clear_chat():
+    """清空对话"""
+    return [], []
+# 构建Gradio界面
+def create_interface():
+    with gr.Blocks(title="AI艺术品讲解智能体", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🎨 AI 艺术品讲解智能体")
+        gr.Markdown("上传一张艺术品图像，让 AI 为您描述这件艺术作品，并回答有关它的问题。")
+        with gr.Row():
+            with gr.Column(scale=1):
+                image_input = gr.Image(
+                    label="上传艺术品图像",
+                    type="pil",
+                    height=300
+                )
+                clear_btn = gr.Button("🗑️ 清空对话", variant="secondary")
+            with gr.Column(scale=2):
+                chatbot = gr.Chatbot(
+                    label="对话区域",
+                    height=500,
+                    show_label=True
+                )
+        user_input = gr.Textbox(
+            label="询问问题",
+            placeholder="请输入关于这幅作品的提问...",
+            lines=2
+        )
+        # 状态管理
+        state = gr.State([])  # 存储模型对话历史
+        # 事件绑定
+        image_input.upload(
+            fn=on_image_upload,
+            inputs=image_input,
+            outputs=[chatbot, state]
+        )
+        user_input.submit(
+            fn=on_user_message,
+            inputs=[user_input, chatbot, state],
+            outputs=[chatbot, state]
+        )
+        user_input.submit(
+            lambda: "",
+            inputs=[],
+            outputs=[user_input]
+        )
+        clear_btn.click(
+            fn=clear_chat,
+            inputs=[],
+            outputs=[chatbot, state]
+        )
+        # 添加使用说明
+        gr.Markdown("""
+        ### 使用说明：
+        1. 点击上传区域选择一张艺术品图像
+        2. AI 会自动分析图像并生成初始介绍
+        3. 在下方输入框中提问关于艺术品的问题
+        4. 支持多轮对话，可以深入讨论艺术品的各个方面
+        ### 注意事项：
+        - 支持常见图片格式（JPG, PNG, WebP等）
+        - 建议上传清晰的艺术品图像以获得更好的分析效果
+        - 首次加载模型可能需要一些时间，请耐心等待
+        """)
+    return demo
+# 主程序
 if __name__ == "__main__":
+    print("🚀 启动 AI 艺术品讲解智能体...")
+    # 加载模型
+    if load_models():
+        print("✅ 所有模型加载完成，启动界��...")
+        demo = create_interface()
+        demo.queue(max_size=10).launch(
+            share=True,
+            server_name="0.0.0.0",
+            server_port=7860,
+            show_error=True
+        )
+    else:
+        print("❌ 模型加载失败，请检查环境配置")