import base64 import gradio as gr import torch import torchvision from diffusers import DiffusionPipeline import PIL.Image import numpy as np from io import BytesIO ldm = DiffusionPipeline.from_pretrained("fusing/latent-diffusion-text2im-large") generator = torch.manual_seed(42) def greet(name): prompt = "A squirrel eating a burger" image = ldm([prompt], generator=generator, eta=0.3, guidance_scale=6.0, num_inference_steps=50) image_processed = image.cpu().permute(0, 2, 3, 1) image_processed = image_processed * 255. image_processed = image_processed.numpy().astype(np.uint8) image_pil = PIL.Image.fromarray(image_processed[0]) # save image as buffer buffered = BytesIO() image_pil.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) print(img_str.decode('utf-8')) return image_pil #return "Gello " + prompt + "!!" image = gr.Image(type="pil", label="Your result") iface = gr.Interface(fn=greet, inputs="text", outputs=[image,gr.outputs.Carousel(label="Individual images",components=["image"]),gr.outputs.Textbox(label="Error")]) iface.launch()