MaxMilan1
		
	commited on
		
		
					Commit 
							
							·
						
						d8aa11d
	
1
								Parent(s):
							
							f2c204b
								
changess
Browse files- app.py +2 -7
- util/text_img.py +78 -30
    	
        app.py
    CHANGED
    
    | @@ -28,11 +28,7 @@ with gr.Blocks(theme=theme) as GenDemo: | |
| 28 | 
             
                    with gr.Row(variant="panel"):
         | 
| 29 | 
             
                        with gr.Column():
         | 
| 30 | 
             
                            prompt = gr.Textbox(label="Enter a discription of a shoe")
         | 
| 31 | 
            -
                             | 
| 32 | 
            -
                            select = gr.Dropdown(label="Select a model", choices=["Canny","Depth","Normal"])
         | 
| 33 | 
            -
                        
         | 
| 34 | 
            -
                            scale = gr.Slider(label="Control Image Scale", minimum=0.1, maximum=1.0, step=0.1, value=0.5, visible=(select == "Canny"))
         | 
| 35 | 
            -
                            
         | 
| 36 | 
             
                            controlNet_image = gr.Image(label="Enter an image of a shoe, that you want to use as a reference", type='numpy')
         | 
| 37 | 
             
                            gr.Examples(
         | 
| 38 | 
             
                                   examples=[
         | 
| @@ -46,8 +42,7 @@ with gr.Blocks(theme=theme) as GenDemo: | |
| 46 | 
             
                            button_gen = gr.Button("Generate Image", elem_id="generateIm", variant="primary")
         | 
| 47 | 
             
                            gen_image = gr.Image(label="Generated Image", image_mode="RGBA", type='pil', show_download_button=True, show_label=False)
         | 
| 48 |  | 
| 49 | 
            -
                     | 
| 50 | 
            -
                    button_gen.click(check_prompt, inputs=[prompt]).success(generate_image, inputs=[prompt, negative_prompt, controlNet_image, scale], outputs=[gen_image])
         | 
| 51 |  | 
| 52 | 
             
                with gr.Tab("Image to 3D Model Generator"):
         | 
| 53 | 
             
                    with gr.Row(variant="panel"):
         | 
|  | |
| 28 | 
             
                    with gr.Row(variant="panel"):
         | 
| 29 | 
             
                        with gr.Column():
         | 
| 30 | 
             
                            prompt = gr.Textbox(label="Enter a discription of a shoe")
         | 
| 31 | 
            +
                            select = gr.Dropdown(label="Select a model", choices=["Canny","Depth","Normal"])      
         | 
|  | |
|  | |
|  | |
|  | |
| 32 | 
             
                            controlNet_image = gr.Image(label="Enter an image of a shoe, that you want to use as a reference", type='numpy')
         | 
| 33 | 
             
                            gr.Examples(
         | 
| 34 | 
             
                                   examples=[
         | 
|  | |
| 42 | 
             
                            button_gen = gr.Button("Generate Image", elem_id="generateIm", variant="primary")
         | 
| 43 | 
             
                            gen_image = gr.Image(label="Generated Image", image_mode="RGBA", type='pil', show_download_button=True, show_label=False)
         | 
| 44 |  | 
| 45 | 
            +
                    button_gen.click(check_prompt, inputs=[prompt]).success(generate_image, inputs=[prompt, negative_prompt, controlNet_image, select], outputs=[gen_image])
         | 
|  | |
| 46 |  | 
| 47 | 
             
                with gr.Tab("Image to 3D Model Generator"):
         | 
| 48 | 
             
                    with gr.Row(variant="panel"):
         | 
    	
        util/text_img.py
    CHANGED
    
    | @@ -1,8 +1,9 @@ | |
| 1 | 
             
            import spaces
         | 
| 2 | 
             
            import rembg
         | 
| 3 | 
             
            import torch
         | 
| 4 | 
            -
            from diffusers import  | 
| 5 | 
             
            import cv2
         | 
|  | |
| 6 | 
             
            import numpy as np
         | 
| 7 | 
             
            from PIL import Image
         | 
| 8 | 
             
            import gradio as gr
         | 
| @@ -14,47 +15,94 @@ def check_prompt(prompt): | |
| 14 | 
             
                if prompt is None:
         | 
| 15 | 
             
                    raise gr.Error("Please enter a prompt!")
         | 
| 16 |  | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
                 | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
                 | 
| 26 | 
            -
             | 
| 27 | 
            -
                torch_dtype=torch.float16,
         | 
| 28 | 
            -
                use_safetensors=True
         | 
| 29 | 
            -
            )
         | 
| 30 |  | 
| 31 | 
            -
             | 
| 32 |  | 
| 33 | 
             
            # Function to generate an image from text using diffusion
         | 
| 34 | 
             
            @spaces.GPU
         | 
| 35 | 
            -
            def generate_image(prompt,  | 
| 36 | 
             
                prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo"
         | 
| 37 |  | 
| 38 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 39 |  | 
| 40 | 
            -
                 | 
| 41 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
| 42 | 
             
                    negative_prompt=negative_prompt,
         | 
| 43 | 
            -
                    image= | 
| 44 | 
            -
                    controlnet_conditioning_scale= | 
| 45 | 
            -
             | 
| 46 | 
             
                image2 = rembg.remove(image)
         | 
| 47 |  | 
| 48 | 
             
                return image2
         | 
| 49 |  | 
| 50 | 
            -
            def get_canny(image):
         | 
| 51 | 
            -
                image = np.array(image)
         | 
| 52 |  | 
| 53 | 
            -
             | 
| 54 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 55 |  | 
| 56 | 
            -
                 | 
| 57 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 58 | 
             
                image = np.concatenate([image, image, image], axis=2)
         | 
| 59 | 
            -
                 | 
| 60 | 
            -
                return  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
             
            import spaces
         | 
| 2 | 
             
            import rembg
         | 
| 3 | 
             
            import torch
         | 
| 4 | 
            +
            from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, AutoencoderKL
         | 
| 5 | 
             
            import cv2
         | 
| 6 | 
            +
            from transformers import pipeline
         | 
| 7 | 
             
            import numpy as np
         | 
| 8 | 
             
            from PIL import Image
         | 
| 9 | 
             
            import gradio as gr
         | 
|  | |
| 15 | 
             
                if prompt is None:
         | 
| 16 | 
             
                    raise gr.Error("Please enter a prompt!")
         | 
| 17 |  | 
| 18 | 
            +
            controlNet_normal = ControlNetModel.from_pretrained(
         | 
| 19 | 
            +
                    "fusing/stable-diffusion-v1-5-controlnet-normal", 
         | 
| 20 | 
            +
                    torch_dtype=torch.float16
         | 
| 21 | 
            +
                )
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            controlNet_depth = ControlNetModel.from_pretrained(
         | 
| 24 | 
            +
                    "lllyasviel/sd-controlnet-depth", 
         | 
| 25 | 
            +
                    torch_dtype=torch.float16
         | 
| 26 | 
            +
                )
         | 
| 27 | 
            +
            controlNet_MAP = {"Normal": controlNet_normal, "Depth": controlNet_depth}
         | 
|  | |
|  | |
|  | |
| 28 |  | 
| 29 | 
            +
            # vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, use_safetensors=True)
         | 
| 30 |  | 
| 31 | 
             
            # Function to generate an image from text using diffusion
         | 
| 32 | 
             
            @spaces.GPU
         | 
| 33 | 
            +
            def generate_image(prompt, control_image, controlnet):
         | 
| 34 | 
             
                prompt += "no background, side view, minimalist shot, single shoe, no legs, product photo"
         | 
| 35 |  | 
| 36 | 
            +
                pipe = StableDiffusionControlNetPipeline.from_pretrained(
         | 
| 37 | 
            +
                "runwayml/stable-diffusion-v1-5",
         | 
| 38 | 
            +
                controlnet=controlNet_MAP[controlnet],
         | 
| 39 | 
            +
                torch_dtype=torch.float16,
         | 
| 40 | 
            +
                safety_checker = None
         | 
| 41 | 
            +
                )
         | 
| 42 | 
            +
                
         | 
| 43 | 
            +
                pipe.to("cuda")
         | 
| 44 |  | 
| 45 | 
            +
                if controlnet == "Normal":
         | 
| 46 | 
            +
                    control_image = get_normal(control_image)
         | 
| 47 | 
            +
                elif controlnet == "Depth":
         | 
| 48 | 
            +
                    control_image = get_depth(control_image)
         | 
| 49 | 
            +
                
         | 
| 50 | 
            +
                image = pipe(prompt,
         | 
| 51 | 
             
                    negative_prompt=negative_prompt,
         | 
| 52 | 
            +
                    image=control_image,
         | 
| 53 | 
            +
                    controlnet_conditioning_scale=1.0).images[0]
         | 
| 54 | 
            +
             | 
| 55 | 
             
                image2 = rembg.remove(image)
         | 
| 56 |  | 
| 57 | 
             
                return image2
         | 
| 58 |  | 
|  | |
|  | |
| 59 |  | 
| 60 | 
            +
            def get_normal(image):
         | 
| 61 | 
            +
                depth_estimator = pipeline("depth-estimation", model ="Intel/dpt-hybrid-midas" )
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                image = depth_estimator(image)['predicted_depth'][0]
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                image = image.numpy()
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                image_depth = image.copy()
         | 
| 68 | 
            +
                image_depth -= np.min(image_depth)
         | 
| 69 | 
            +
                image_depth /= np.max(image_depth)
         | 
| 70 |  | 
| 71 | 
            +
                bg_threhold = 0.4
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
         | 
| 74 | 
            +
                x[image_depth < bg_threhold] = 0
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
         | 
| 77 | 
            +
                y[image_depth < bg_threhold] = 0
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                z = np.ones_like(x) * np.pi * 2.0
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                image = np.stack([x, y, z], axis=2)
         | 
| 82 | 
            +
                image /= np.sum(image ** 2.0, axis=2, keepdims=True) ** 0.5
         | 
| 83 | 
            +
                image = (image * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
         | 
| 84 | 
            +
                normalimage = Image.fromarray(image)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                return normalimage
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            def get_depth(image):
         | 
| 89 | 
            +
                depth_estimator = pipeline('depth-estimation')
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                image = depth_estimator(image)['depth']
         | 
| 92 | 
            +
                image = np.array(image)
         | 
| 93 | 
            +
                image = image[:, :, None]
         | 
| 94 | 
             
                image = np.concatenate([image, image, image], axis=2)
         | 
| 95 | 
            +
                depthimage = Image.fromarray(image)
         | 
| 96 | 
            +
                return depthimage
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            # def get_canny(image):
         | 
| 99 | 
            +
            #     image = np.array(image)
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            #     low_threshold = 100
         | 
| 102 | 
            +
            #     high_threshold = 200
         | 
| 103 | 
            +
             | 
| 104 | 
            +
            #     image = cv2.Canny(image,low_threshold,high_threshold)
         | 
| 105 | 
            +
            #     image = image[:,:,None]
         | 
| 106 | 
            +
            #     image = np.concatenate([image, image, image], axis=2)
         | 
| 107 | 
            +
            #     canny_image = Image.fromarray(image)
         | 
| 108 | 
            +
            #     return canny_image
         |