import gradio as gr import os from PIL import Image, ImageChops, ImageFilter from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration import torch import matplotlib.pyplot as plt # 初始化模型 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # 图像处理函数 def compute_difference_images(img_a, img_b): def extract_sketch(image): grayscale = image.convert("L") inverted = ImageChops.invert(grayscale) sketch = ImageChops.screen(grayscale, inverted) return sketch def compute_normal_map(image): edges = image.filter(ImageFilter.FIND_EDGES) return edges diff_overlay = ImageChops.difference(img_a, img_b) return { "original_a": img_a, "original_b": img_b, "sketch_a": extract_sketch(img_a), "sketch_b": extract_sketch(img_b), "normal_a": compute_normal_map(img_a), "normal_b": compute_normal_map(img_b), "diff_overlay": diff_overlay } # 保存图像到文件 def save_images(images): paths = [] for key, img in images.items(): path = f"{key}.png" img.save(path) paths.append((path, key.replace("_", " ").capitalize())) return paths # BLIP生成更详尽描述 def generate_detailed_caption(image): inputs = blip_processor(image, return_tensors="pt") caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2) return blip_processor.decode(caption[0], skip_special_tokens=True) # 特征差异可视化 def plot_feature_differences(latent_diff): diff_magnitude = [abs(x) for x in latent_diff[0]] indices = range(len(diff_magnitude)) plt.figure(figsize=(8, 4)) plt.bar(indices, diff_magnitude, alpha=0.7) plt.xlabel("Feature Index") plt.ylabel("Magnitude of Difference") plt.title("Feature Differences (Bar Chart)") bar_chart_path = "bar_chart.png" plt.savefig(bar_chart_path) plt.close() plt.figure(figsize=(6, 6)) plt.pie(diff_magnitude[:10], labels=range(10), autopct="%1.1f%%", startangle=140) plt.title("Top 10 Feature Differences (Pie Chart)") pie_chart_path = "pie_chart.png" plt.savefig(pie_chart_path) plt.close() return bar_chart_path, pie_chart_path # 生成详细分析 def generate_text_analysis(api_key, api_type, caption_a, caption_b): import openai if api_type == "DeepSeek": from openai import OpenAI client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com") else: client = openai response = client.ChatCompletion.create( model="gpt-4" if api_type == "GPT" else "deepseek-chat", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"} ] ) return response['choices'][0]['message']['content'].strip() # 分析函数 def analyze_images(img_a, img_b, api_key, api_type): images_diff = compute_difference_images(img_a, img_b) saved_images = save_images(images_diff) caption_a = generate_detailed_caption(img_a) caption_b = generate_detailed_caption(img_b) inputs = clip_processor(images=img_a, return_tensors="pt") features_a = clip_model.get_image_features(**inputs).detach().numpy() inputs = clip_processor(images=img_b, return_tensors="pt") features_b = clip_model.get_image_features(**inputs).detach().numpy() latent_diff = np.abs(features_a - features_b).tolist() bar_chart, pie_chart = plot_feature_differences(latent_diff) text_analysis = generate_text_analysis(api_key, api_type, caption_a, caption_b) return { "saved_images": saved_images, "caption_a": caption_a, "caption_b": caption_b, "text_analysis": text_analysis, "bar_chart": bar_chart, "pie_chart": pie_chart } # 批量分析 def batch_analyze(folder_a, folder_b, api_key, api_type): def load_images(folder_path): files = sorted([os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]) return [Image.open(f).convert("RGB") for f in files] images_a = load_images(folder_a) images_b = load_images(folder_b) num_pairs = min(len(images_a), len(images_b)) results = [] for i in range(num_pairs): result = analyze_images(images_a[i], images_b[i], api_key, api_type) results.append({ "pair": (f"Image A-{i+1}", f"Image B-{i+1}"), **result }) return results # Gradio界面 with gr.Blocks() as demo: gr.Markdown("# 批量图像对比分析工具") api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 API Key", type="password") api_type_input = gr.Dropdown(label="API 类型", choices=["GPT", "DeepSeek"], value="GPT") folder_a_input = gr.Textbox(label="文件夹A路径", placeholder="输入包含图片A的文件夹路径") folder_b_input = gr.Textbox(label="文件夹B路径", placeholder="输入包含图片B的文件夹路径") analyze_button = gr.Button("开始批量分析") with gr.Row(): result_gallery = gr.Gallery(label="差异图像").style(grid=3) result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5) def process_batch_analysis(folder_a, folder_b, api_key, api_type): results = batch_analyze(folder_a, folder_b, api_key, api_type) all_images = [] all_texts = [] for result in results: all_images.extend(result["saved_images"]) all_images.append((result["bar_chart"], "Bar Chart")) all_images.append((result["pie_chart"], "Pie Chart")) all_texts.append(f"{result['pair'][0]} vs {result['pair'][1]}:\n{result['text_analysis']}") return all_images, "\n\n".join(all_texts) analyze_button.click( fn=process_batch_analysis, inputs=[folder_a_input, folder_b_input, api_key_input, api_type_input], outputs=[result_gallery, result_text_analysis] ) demo.launch()