File size: 5,459 Bytes
1acc6e4
a0bb102
 
 
 
 
 
1acc6e4
 
52428d7
b2a4f9a
52428d7
a0bb102
 
 
 
e3a9a0b
 
1acc6e4
e3a9a0b
1acc6e4
52428d7
1acc6e4
a0bb102
 
 
 
 
1acc6e4
 
a0bb102
e3a9a0b
1acc6e4
 
 
 
 
a0bb102
1acc6e4
 
a0bb102
1acc6e4
a0bb102
e3a9a0b
1acc6e4
 
 
a0bb102
 
 
 
 
 
 
b2a4f9a
a0bb102
1acc6e4
a0bb102
1acc6e4
a0bb102
 
 
 
 
 
b2a4f9a
 
a0bb102
b2a4f9a
1acc6e4
b2a4f9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0bb102
 
b2a4f9a
 
 
 
 
 
 
 
1acc6e4
b2a4f9a
1acc6e4
a0bb102
1acc6e4
 
a0bb102
1acc6e4
a0bb102
b2a4f9a
a0bb102
b2a4f9a
 
1acc6e4
b2a4f9a
 
a0bb102
c8752a1
 
a0bb102
1acc6e4
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import torch
from transformers import (
    SegformerImageProcessor,
    SegformerForSemanticSegmentation,
    DPTImageProcessor,
    DPTForDepthEstimation
)
from PIL import Image, ImageFilter
import numpy as np
import gradio as gr
import cv2

# Suppress specific warnings
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="transformers")

# Load pre-trained models and processors
seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

def process_image(image):
    # Ensure image is in RGB
    if image.mode != "RGB":
        image = image.convert("RGB")
    
    # Resize the image to 512x512
    image = image.resize((512, 512))
    
    # ------------------ Semantic Segmentation ------------------
    seg_inputs = seg_processor(images=image, return_tensors="pt")
    with torch.no_grad():
        seg_outputs = seg_model(**seg_inputs)
    seg_logits = seg_outputs.logits
    segmentation = torch.argmax(seg_logits, dim=1)[0].numpy()
    
    # Create binary mask for 'person' class (class index 12)
    person_class_index = 12
    binary_mask = (segmentation == person_class_index).astype(np.uint8) * 255
    binary_mask_image = Image.fromarray(binary_mask)
    
    # ------------------ Depth Estimation ------------------
    depth_inputs = depth_processor(images=image, return_tensors="pt")
    with torch.no_grad():
        depth_outputs = depth_model(**depth_inputs)
    predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy()
    
    # Normalize the depth map for visualization
    min_depth = predicted_depth.min()
    max_depth = predicted_depth.max()
    normalized_depth = (predicted_depth - min_depth) / (max_depth - min_depth)
    depth_map_image = Image.fromarray((normalized_depth * 255).astype(np.uint8))
    
    # ------------------ Gaussian Blurred Background Effect ------------------
    # Invert the depth map
    inverted_depth = 1 - normalized_depth
    inverted_depth = (inverted_depth - inverted_depth.min()) / (inverted_depth.max() - inverted_depth.min())
    
    # Resize and expand dimensions to match image channels
    depth_weight_resized = Image.fromarray((inverted_depth * 255).astype(np.uint8)).resize((512, 512))
    depth_weight_resized = np.array(depth_weight_resized) / 255.0
    depth_weight_resized = np.expand_dims(depth_weight_resized, axis=-1)
    
    # Apply Gaussian blur to the entire image
    gaussian_blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))
    gaussian_blurred_np = np.array(gaussian_blurred_image).astype(np.float32)
    
    # Blend images based on the depth weight
    original_np = np.array(image).astype(np.float32)
    composite_gaussian_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * gaussian_blurred_np
    composite_gaussian_image = Image.fromarray(np.clip(composite_gaussian_np, 0, 255).astype(np.uint8))
    
    # ------------------ Lens Blurred Background Effect ------------------
    # Convert PIL image to OpenCV format
    original_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    
    # Apply Lens Blur using OpenCV's blur with a larger kernel
    # Note: OpenCV does not have a direct lens blur function, but we can approximate it
    # by using a larger kernel size. For a more realistic lens blur, additional processing is required.
    lens_blur_kernel_size = 21  # Adjust kernel size for stronger blur
    lens_blurred_cv = cv2.GaussianBlur(original_cv, (lens_blur_kernel_size, lens_blur_kernel_size), 0)
    
    # Convert back to PIL Image
    lens_blurred_image = Image.fromarray(cv2.cvtColor(lens_blurred_cv, cv2.COLOR_BGR2RGB))
    lens_blurred_np = np.array(lens_blurred_image).astype(np.float32)
    
    # Blend images based on the depth weight
    composite_lens_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * lens_blurred_np
    composite_lens_image = Image.fromarray(np.clip(composite_lens_np, 0, 255).astype(np.uint8))
    
    # Return results
    binary_mask_image = binary_mask_image.convert("L")  # Ensure it's in grayscale
    depth_map_image = depth_map_image.convert("L")      # Ensure it's in grayscale
    gaussian_blurred_image = composite_gaussian_image
    lens_blurred_image = composite_lens_image
    
    return image, binary_mask_image, depth_map_image, gaussian_blurred_image, lens_blurred_image

# Define Gradio interface using the updated API
interface = gr.Interface(
    fn=process_image,
    inputs=gr.Image(type="pil", label="Upload Image"),
    outputs=[
        gr.Image(type="pil", label="Original Image"),
        gr.Image(type="pil", label="Segmentation Mask (B/W)"),
        gr.Image(type="pil", label="Depth Map"),
        gr.Image(type="pil", label="Gaussian Blurred Background"),
        gr.Image(type="pil", label="Lens Blurred Background"),
    ],
    title="Semantic Segmentation and Dual Blur Effects",
    description="Upload an image to generate a segmentation mask, depth map, Gaussian blurred background, and lens blurred background effect.",
    examples=[
        ["examples/Selfie_1.jpg"],
        ["examples/Selfie_2.jpg"]
    ]
)

# Launch the interface
if __name__ == "__main__":
    interface.launch()