Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
from PIL import Image, ImageChops, ImageFilter | |
from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration | |
import torch | |
import matplotlib.pyplot as plt | |
# 初始化模型 | |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") | |
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
# 图像处理函数 | |
def compute_difference_images(img_a, img_b): | |
def extract_sketch(image): | |
grayscale = image.convert("L") | |
inverted = ImageChops.invert(grayscale) | |
sketch = ImageChops.screen(grayscale, inverted) | |
return sketch | |
def compute_normal_map(image): | |
edges = image.filter(ImageFilter.FIND_EDGES) | |
return edges | |
diff_overlay = ImageChops.difference(img_a, img_b) | |
return { | |
"original_a": img_a, | |
"original_b": img_b, | |
"sketch_a": extract_sketch(img_a), | |
"sketch_b": extract_sketch(img_b), | |
"normal_a": compute_normal_map(img_a), | |
"normal_b": compute_normal_map(img_b), | |
"diff_overlay": diff_overlay | |
} | |
# 保存图像到文件 | |
def save_images(images): | |
paths = [] | |
for key, img in images.items(): | |
path = f"{key}.png" | |
img.save(path) | |
paths.append((path, key.replace("_", " ").capitalize())) | |
return paths | |
# BLIP生成更详尽描述 | |
def generate_detailed_caption(image): | |
inputs = blip_processor(image, return_tensors="pt") | |
caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2) | |
return blip_processor.decode(caption[0], skip_special_tokens=True) | |
# 特征差异可视化 | |
def plot_feature_differences(latent_diff): | |
diff_magnitude = [abs(x) for x in latent_diff[0]] | |
indices = range(len(diff_magnitude)) | |
plt.figure(figsize=(8, 4)) | |
plt.bar(indices, diff_magnitude, alpha=0.7) | |
plt.xlabel("Feature Index") | |
plt.ylabel("Magnitude of Difference") | |
plt.title("Feature Differences (Bar Chart)") | |
bar_chart_path = "bar_chart.png" | |
plt.savefig(bar_chart_path) | |
plt.close() | |
plt.figure(figsize=(6, 6)) | |
plt.pie(diff_magnitude[:10], labels=range(10), autopct="%1.1f%%", startangle=140) | |
plt.title("Top 10 Feature Differences (Pie Chart)") | |
pie_chart_path = "pie_chart.png" | |
plt.savefig(pie_chart_path) | |
plt.close() | |
return bar_chart_path, pie_chart_path | |
# 生成详细分析 | |
def generate_text_analysis(api_key, api_type, caption_a, caption_b): | |
import openai | |
if api_type == "DeepSeek": | |
from openai import OpenAI | |
client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com") | |
else: | |
client = openai | |
response = client.ChatCompletion.create( | |
model="gpt-4" if api_type == "GPT" else "deepseek-chat", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"} | |
] | |
) | |
return response['choices'][0]['message']['content'].strip() | |
# 分析函数 | |
def analyze_images(img_a, img_b, api_key, api_type): | |
images_diff = compute_difference_images(img_a, img_b) | |
saved_images = save_images(images_diff) | |
caption_a = generate_detailed_caption(img_a) | |
caption_b = generate_detailed_caption(img_b) | |
inputs = clip_processor(images=img_a, return_tensors="pt") | |
features_a = clip_model.get_image_features(**inputs).detach().numpy() | |
inputs = clip_processor(images=img_b, return_tensors="pt") | |
features_b = clip_model.get_image_features(**inputs).detach().numpy() | |
latent_diff = np.abs(features_a - features_b).tolist() | |
bar_chart, pie_chart = plot_feature_differences(latent_diff) | |
text_analysis = generate_text_analysis(api_key, api_type, caption_a, caption_b) | |
return { | |
"saved_images": saved_images, | |
"caption_a": caption_a, | |
"caption_b": caption_b, | |
"text_analysis": text_analysis, | |
"bar_chart": bar_chart, | |
"pie_chart": pie_chart | |
} | |
# 批量分析 | |
def batch_analyze(folder_a, folder_b, api_key, api_type): | |
def load_images(folder_path): | |
files = sorted([os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]) | |
return [Image.open(f).convert("RGB") for f in files] | |
images_a = load_images(folder_a) | |
images_b = load_images(folder_b) | |
num_pairs = min(len(images_a), len(images_b)) | |
results = [] | |
for i in range(num_pairs): | |
result = analyze_images(images_a[i], images_b[i], api_key, api_type) | |
results.append({ | |
"pair": (f"Image A-{i+1}", f"Image B-{i+1}"), | |
**result | |
}) | |
return results | |
# Gradio界面 | |
with gr.Blocks() as demo: | |
gr.Markdown("# 批量图像对比分析工具") | |
api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 API Key", type="password") | |
api_type_input = gr.Dropdown(label="API 类型", choices=["GPT", "DeepSeek"], value="GPT") | |
folder_a_input = gr.Textbox(label="文件夹A路径", placeholder="输入包含图片A的文件夹路径") | |
folder_b_input = gr.Textbox(label="文件夹B路径", placeholder="输入包含图片B的文件夹路径") | |
analyze_button = gr.Button("开始批量分析") | |
with gr.Row(): | |
result_gallery = gr.Gallery(label="差异图像").style(grid=3) | |
result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5) | |
def process_batch_analysis(folder_a, folder_b, api_key, api_type): | |
results = batch_analyze(folder_a, folder_b, api_key, api_type) | |
all_images = [] | |
all_texts = [] | |
for result in results: | |
all_images.extend(result["saved_images"]) | |
all_images.append((result["bar_chart"], "Bar Chart")) | |
all_images.append((result["pie_chart"], "Pie Chart")) | |
all_texts.append(f"{result['pair'][0]} vs {result['pair'][1]}:\n{result['text_analysis']}") | |
return all_images, "\n\n".join(all_texts) | |
analyze_button.click( | |
fn=process_batch_analysis, | |
inputs=[folder_a_input, folder_b_input, api_key_input, api_type_input], | |
outputs=[result_gallery, result_text_analysis] | |
) | |
demo.launch() |