Spaces:

kadirnar
/

ComfyUI-Demo

Paused

App Files Files

xet

Community

ComfyUI-Demo / stable_cascade.py

kadirnar

Update stable_cascade.py

b31f197 verified over 1 year ago

raw

history blame

5.95 kB

	import torch
	from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline
	import gradio as gr

	torch._dynamo.config.suppress_errors = True
	os.environ['TOKENIZERS_PARALLELISM'] = 'false'

	prior = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", torch_dtype=torch.bfloat16)
	decoder = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", torch_dtype=torch.bfloat16)

	prior.prior = torch.compile(prior.prior, mode="reduce-overhead", fullgraph=True)
	decoder.decoder = torch.compile(decoder.decoder, mode="max-autotune", fullgraph=True)

	prior.to("cuda")
	decoder.to("cuda")

	def generate_images(
	prompt="a photo of a girl",
	negative_prompt="bad,ugly,deformed",
	height=1024,
	width=1024,
	guidance_scale=4.0,
	num_images_per_prompt=1,
	prior_inference_steps=20,
	decoder_inference_steps=10
	):
	"""
	Generates images based on a given prompt using Stable Diffusion models on CUDA device.
	Parameters:
	- prompt (str): The prompt to generate images for.
	- negative_prompt (str): The negative prompt to guide image generation away from.
	- height (int): The height of the generated images.
	- width (int): The width of the generated images.
	- guidance_scale (float): The scale of guidance for the image generation.
	- prior_inference_steps (int): The number of inference steps for the prior model.
	- decoder_inference_steps (int): The number of inference steps for the decoder model.
	Returns:
	- List[PIL.Image]: A list of generated PIL Image objects.
	"""

	# Generate image embeddings using the prior model
	prior_output = prior(
	prompt=prompt,
	height=height,
	width=width,
	negative_prompt=negative_prompt,
	guidance_scale=guidance_scale,
	num_images_per_prompt=num_images_per_prompt,
	num_inference_steps=prior_inference_steps
	)

	# Generate images using the decoder model and the embeddings from the prior model
	decoder_output = decoder(
	image_embeddings=prior_output.image_embeddings.half(),
	prompt=prompt,
	negative_prompt=negative_prompt,
	guidance_scale=0.0, # Guidance scale typically set to 0 for decoder as guidance is applied in the prior
	output_type="pil",
	num_inference_steps=decoder_inference_steps
	).images

	return decoder_output


	def web_demo():
	with gr.Blocks():
	with gr.Row():
	with gr.Column():
	text2image_prompt = gr.Textbox(
	lines=1,
	placeholder="Prompt",
	show_label=False,
	)

	text2image_negative_prompt = gr.Textbox(
	lines=1,
	placeholder="Negative Prompt",
	show_label=False,
	)
	with gr.Row():
	with gr.Column():
	text2image_num_images_per_prompt = gr.Slider(
	minimum=1,
	maximum=4,
	step=1,
	value=1,
	label="Number Image",
	)

	text2image_height = gr.Slider(
	minimum=128,
	maximum=1280,
	step=32,
	value=1024,
	label="Image Height",
	)

	text2image_width = gr.Slider(
	minimum=128,
	maximum=1280,
	step=32,
	value=1024,
	label="Image Width",
	)
	with gr.Row():
	with gr.Column():
	text2image_guidance_scale = gr.Slider(
	minimum=0.1,
	maximum=15,
	step=0.1,
	value=4.0,
	label="Guidance Scale",
	)
	text2image_prior_inference_step = gr.Slider(
	minimum=1,
	maximum=50,
	step=1,
	value=20,
	label="Prior Inference Step",
	)

	text2image_decoder_inference_step = gr.Slider(
	minimum=1,
	maximum=50,
	step=1,
	value=10,
	label="Decoder Inference Step",
	)
	text2image_predict = gr.Button(value="Generate Image")

	with gr.Column():
	output_image = gr.Gallery(
	label="Generated images",
	show_label=False,
	elem_id="gallery",
	)

	text2image_predict.click(
	fn=generate_images,
	inputs=[
	text2image_prompt,
	text2image_negative_prompt,
	text2image_height,
	text2image_width,
	text2image_guidance_scale,
	text2image_num_images_per_prompt,
	text2image_prior_inference_step,
	text2image_decoder_inference_step
	],
	outputs=output_image,
	)