Spaces:
Running
Running
File size: 5,459 Bytes
1acc6e4 a0bb102 1acc6e4 52428d7 b2a4f9a 52428d7 a0bb102 e3a9a0b 1acc6e4 e3a9a0b 1acc6e4 52428d7 1acc6e4 a0bb102 1acc6e4 a0bb102 e3a9a0b 1acc6e4 a0bb102 1acc6e4 a0bb102 1acc6e4 a0bb102 e3a9a0b 1acc6e4 a0bb102 b2a4f9a a0bb102 1acc6e4 a0bb102 1acc6e4 a0bb102 b2a4f9a a0bb102 b2a4f9a 1acc6e4 b2a4f9a a0bb102 b2a4f9a 1acc6e4 b2a4f9a 1acc6e4 a0bb102 1acc6e4 a0bb102 1acc6e4 a0bb102 b2a4f9a a0bb102 b2a4f9a 1acc6e4 b2a4f9a a0bb102 c8752a1 a0bb102 1acc6e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import torch
from transformers import (
SegformerImageProcessor,
SegformerForSemanticSegmentation,
DPTImageProcessor,
DPTForDepthEstimation
)
from PIL import Image, ImageFilter
import numpy as np
import gradio as gr
import cv2
# Suppress specific warnings
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")
# Load pre-trained models and processors
seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
def process_image(image):
# Ensure image is in RGB
if image.mode != "RGB":
image = image.convert("RGB")
# Resize the image to 512x512
image = image.resize((512, 512))
# ------------------ Semantic Segmentation ------------------
seg_inputs = seg_processor(images=image, return_tensors="pt")
with torch.no_grad():
seg_outputs = seg_model(**seg_inputs)
seg_logits = seg_outputs.logits
segmentation = torch.argmax(seg_logits, dim=1)[0].numpy()
# Create binary mask for 'person' class (class index 12)
person_class_index = 12
binary_mask = (segmentation == person_class_index).astype(np.uint8) * 255
binary_mask_image = Image.fromarray(binary_mask)
# ------------------ Depth Estimation ------------------
depth_inputs = depth_processor(images=image, return_tensors="pt")
with torch.no_grad():
depth_outputs = depth_model(**depth_inputs)
predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy()
# Normalize the depth map for visualization
min_depth = predicted_depth.min()
max_depth = predicted_depth.max()
normalized_depth = (predicted_depth - min_depth) / (max_depth - min_depth)
depth_map_image = Image.fromarray((normalized_depth * 255).astype(np.uint8))
# ------------------ Gaussian Blurred Background Effect ------------------
# Invert the depth map
inverted_depth = 1 - normalized_depth
inverted_depth = (inverted_depth - inverted_depth.min()) / (inverted_depth.max() - inverted_depth.min())
# Resize and expand dimensions to match image channels
depth_weight_resized = Image.fromarray((inverted_depth * 255).astype(np.uint8)).resize((512, 512))
depth_weight_resized = np.array(depth_weight_resized) / 255.0
depth_weight_resized = np.expand_dims(depth_weight_resized, axis=-1)
# Apply Gaussian blur to the entire image
gaussian_blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))
gaussian_blurred_np = np.array(gaussian_blurred_image).astype(np.float32)
# Blend images based on the depth weight
original_np = np.array(image).astype(np.float32)
composite_gaussian_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * gaussian_blurred_np
composite_gaussian_image = Image.fromarray(np.clip(composite_gaussian_np, 0, 255).astype(np.uint8))
# ------------------ Lens Blurred Background Effect ------------------
# Convert PIL image to OpenCV format
original_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# Apply Lens Blur using OpenCV's blur with a larger kernel
# Note: OpenCV does not have a direct lens blur function, but we can approximate it
# by using a larger kernel size. For a more realistic lens blur, additional processing is required.
lens_blur_kernel_size = 21 # Adjust kernel size for stronger blur
lens_blurred_cv = cv2.GaussianBlur(original_cv, (lens_blur_kernel_size, lens_blur_kernel_size), 0)
# Convert back to PIL Image
lens_blurred_image = Image.fromarray(cv2.cvtColor(lens_blurred_cv, cv2.COLOR_BGR2RGB))
lens_blurred_np = np.array(lens_blurred_image).astype(np.float32)
# Blend images based on the depth weight
composite_lens_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * lens_blurred_np
composite_lens_image = Image.fromarray(np.clip(composite_lens_np, 0, 255).astype(np.uint8))
# Return results
binary_mask_image = binary_mask_image.convert("L") # Ensure it's in grayscale
depth_map_image = depth_map_image.convert("L") # Ensure it's in grayscale
gaussian_blurred_image = composite_gaussian_image
lens_blurred_image = composite_lens_image
return image, binary_mask_image, depth_map_image, gaussian_blurred_image, lens_blurred_image
# Define Gradio interface using the updated API
interface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=[
gr.Image(type="pil", label="Original Image"),
gr.Image(type="pil", label="Segmentation Mask (B/W)"),
gr.Image(type="pil", label="Depth Map"),
gr.Image(type="pil", label="Gaussian Blurred Background"),
gr.Image(type="pil", label="Lens Blurred Background"),
],
title="Semantic Segmentation and Dual Blur Effects",
description="Upload an image to generate a segmentation mask, depth map, Gaussian blurred background, and lens blurred background effect.",
examples=[
["examples/Selfie_1.jpg"],
["examples/Selfie_2.jpg"]
]
)
# Launch the interface
if __name__ == "__main__":
interface.launch()
|