Stand-In

Paused

App Files Files Community

Stand-In / prompters /omost.py

fffiloni

Migrated from GitHub

26557da verified 12 days ago

raw

history blame contribute delete

16.7 kB

	from transformers import AutoTokenizer, TextIteratorStreamer
	import difflib
	import torch
	import numpy as np
	import re
	from models.model_manager import ModelManager
	from PIL import Image

	valid_colors = { # r, g, b
	"aliceblue": (240, 248, 255),
	"antiquewhite": (250, 235, 215),
	"aqua": (0, 255, 255),
	"aquamarine": (127, 255, 212),
	"azure": (240, 255, 255),
	"beige": (245, 245, 220),
	"bisque": (255, 228, 196),
	"black": (0, 0, 0),
	"blanchedalmond": (255, 235, 205),
	"blue": (0, 0, 255),
	"blueviolet": (138, 43, 226),
	"brown": (165, 42, 42),
	"burlywood": (222, 184, 135),
	"cadetblue": (95, 158, 160),
	"chartreuse": (127, 255, 0),
	"chocolate": (210, 105, 30),
	"coral": (255, 127, 80),
	"cornflowerblue": (100, 149, 237),
	"cornsilk": (255, 248, 220),
	"crimson": (220, 20, 60),
	"cyan": (0, 255, 255),
	"darkblue": (0, 0, 139),
	"darkcyan": (0, 139, 139),
	"darkgoldenrod": (184, 134, 11),
	"darkgray": (169, 169, 169),
	"darkgrey": (169, 169, 169),
	"darkgreen": (0, 100, 0),
	"darkkhaki": (189, 183, 107),
	"darkmagenta": (139, 0, 139),
	"darkolivegreen": (85, 107, 47),
	"darkorange": (255, 140, 0),
	"darkorchid": (153, 50, 204),
	"darkred": (139, 0, 0),
	"darksalmon": (233, 150, 122),
	"darkseagreen": (143, 188, 143),
	"darkslateblue": (72, 61, 139),
	"darkslategray": (47, 79, 79),
	"darkslategrey": (47, 79, 79),
	"darkturquoise": (0, 206, 209),
	"darkviolet": (148, 0, 211),
	"deeppink": (255, 20, 147),
	"deepskyblue": (0, 191, 255),
	"dimgray": (105, 105, 105),
	"dimgrey": (105, 105, 105),
	"dodgerblue": (30, 144, 255),
	"firebrick": (178, 34, 34),
	"floralwhite": (255, 250, 240),
	"forestgreen": (34, 139, 34),
	"fuchsia": (255, 0, 255),
	"gainsboro": (220, 220, 220),
	"ghostwhite": (248, 248, 255),
	"gold": (255, 215, 0),
	"goldenrod": (218, 165, 32),
	"gray": (128, 128, 128),
	"grey": (128, 128, 128),
	"green": (0, 128, 0),
	"greenyellow": (173, 255, 47),
	"honeydew": (240, 255, 240),
	"hotpink": (255, 105, 180),
	"indianred": (205, 92, 92),
	"indigo": (75, 0, 130),
	"ivory": (255, 255, 240),
	"khaki": (240, 230, 140),
	"lavender": (230, 230, 250),
	"lavenderblush": (255, 240, 245),
	"lawngreen": (124, 252, 0),
	"lemonchiffon": (255, 250, 205),
	"lightblue": (173, 216, 230),
	"lightcoral": (240, 128, 128),
	"lightcyan": (224, 255, 255),
	"lightgoldenrodyellow": (250, 250, 210),
	"lightgray": (211, 211, 211),
	"lightgrey": (211, 211, 211),
	"lightgreen": (144, 238, 144),
	"lightpink": (255, 182, 193),
	"lightsalmon": (255, 160, 122),
	"lightseagreen": (32, 178, 170),
	"lightskyblue": (135, 206, 250),
	"lightslategray": (119, 136, 153),
	"lightslategrey": (119, 136, 153),
	"lightsteelblue": (176, 196, 222),
	"lightyellow": (255, 255, 224),
	"lime": (0, 255, 0),
	"limegreen": (50, 205, 50),
	"linen": (250, 240, 230),
	"magenta": (255, 0, 255),
	"maroon": (128, 0, 0),
	"mediumaquamarine": (102, 205, 170),
	"mediumblue": (0, 0, 205),
	"mediumorchid": (186, 85, 211),
	"mediumpurple": (147, 112, 219),
	"mediumseagreen": (60, 179, 113),
	"mediumslateblue": (123, 104, 238),
	"mediumspringgreen": (0, 250, 154),
	"mediumturquoise": (72, 209, 204),
	"mediumvioletred": (199, 21, 133),
	"midnightblue": (25, 25, 112),
	"mintcream": (245, 255, 250),
	"mistyrose": (255, 228, 225),
	"moccasin": (255, 228, 181),
	"navajowhite": (255, 222, 173),
	"navy": (0, 0, 128),
	"navyblue": (0, 0, 128),
	"oldlace": (253, 245, 230),
	"olive": (128, 128, 0),
	"olivedrab": (107, 142, 35),
	"orange": (255, 165, 0),
	"orangered": (255, 69, 0),
	"orchid": (218, 112, 214),
	"palegoldenrod": (238, 232, 170),
	"palegreen": (152, 251, 152),
	"paleturquoise": (175, 238, 238),
	"palevioletred": (219, 112, 147),
	"papayawhip": (255, 239, 213),
	"peachpuff": (255, 218, 185),
	"peru": (205, 133, 63),
	"pink": (255, 192, 203),
	"plum": (221, 160, 221),
	"powderblue": (176, 224, 230),
	"purple": (128, 0, 128),
	"rebeccapurple": (102, 51, 153),
	"red": (255, 0, 0),
	"rosybrown": (188, 143, 143),
	"royalblue": (65, 105, 225),
	"saddlebrown": (139, 69, 19),
	"salmon": (250, 128, 114),
	"sandybrown": (244, 164, 96),
	"seagreen": (46, 139, 87),
	"seashell": (255, 245, 238),
	"sienna": (160, 82, 45),
	"silver": (192, 192, 192),
	"skyblue": (135, 206, 235),
	"slateblue": (106, 90, 205),
	"slategray": (112, 128, 144),
	"slategrey": (112, 128, 144),
	"snow": (255, 250, 250),
	"springgreen": (0, 255, 127),
	"steelblue": (70, 130, 180),
	"tan": (210, 180, 140),
	"teal": (0, 128, 128),
	"thistle": (216, 191, 216),
	"tomato": (255, 99, 71),
	"turquoise": (64, 224, 208),
	"violet": (238, 130, 238),
	"wheat": (245, 222, 179),
	"white": (255, 255, 255),
	"whitesmoke": (245, 245, 245),
	"yellow": (255, 255, 0),
	"yellowgreen": (154, 205, 50),
	}

	valid_locations = { # x, y in 90*90
	"in the center": (45, 45),
	"on the left": (15, 45),
	"on the right": (75, 45),
	"on the top": (45, 15),
	"on the bottom": (45, 75),
	"on the top-left": (15, 15),
	"on the top-right": (75, 15),
	"on the bottom-left": (15, 75),
	"on the bottom-right": (75, 75),
	}

	valid_offsets = { # x, y in 90*90
	"no offset": (0, 0),
	"slightly to the left": (-10, 0),
	"slightly to the right": (10, 0),
	"slightly to the upper": (0, -10),
	"slightly to the lower": (0, 10),
	"slightly to the upper-left": (-10, -10),
	"slightly to the upper-right": (10, -10),
	"slightly to the lower-left": (-10, 10),
	"slightly to the lower-right": (10, 10),
	}

	valid_areas = { # w, h in 90*90
	"a small square area": (50, 50),
	"a small vertical area": (40, 60),
	"a small horizontal area": (60, 40),
	"a medium-sized square area": (60, 60),
	"a medium-sized vertical area": (50, 80),
	"a medium-sized horizontal area": (80, 50),
	"a large square area": (70, 70),
	"a large vertical area": (60, 90),
	"a large horizontal area": (90, 60),
	}


	def safe_str(x):
	return x.strip(",. ") + "."


	def closest_name(input_str, options):
	input_str = input_str.lower()

	closest_match = difflib.get_close_matches(
	input_str, list(options.keys()), n=1, cutoff=0.5
	)
	assert isinstance(closest_match, list) and len(closest_match) > 0, (
	f"The value [{input_str}] is not valid!"
	)
	result = closest_match[0]

	if result != input_str:
	print(f"Automatically corrected [{input_str}] -> [{result}].")

	return result


	class Canvas:
	@staticmethod
	def from_bot_response(response: str):
	matched = re.search(r"```python\n(.*?)\n```", response, re.DOTALL)
	assert matched, "Response does not contain codes!"
	code_content = matched.group(1)
	assert "canvas = Canvas()" in code_content, (
	"Code block must include valid canvas var!"
	)
	local_vars = {"Canvas": Canvas}
	exec(code_content, {}, local_vars)
	canvas = local_vars.get("canvas", None)
	assert isinstance(canvas, Canvas), "Code block must produce valid canvas var!"
	return canvas

	def __init__(self):
	self.components = []
	self.color = None
	self.record_tags = True
	self.prefixes = []
	self.suffixes = []
	return

	def set_global_description(
	self,
	description: str,
	detailed_descriptions: list,
	tags: str,
	HTML_web_color_name: str,
	):
	assert isinstance(description, str), "Global description is not valid!"
	assert isinstance(detailed_descriptions, list) and all(
	isinstance(item, str) for item in detailed_descriptions
	), "Global detailed_descriptions is not valid!"
	assert isinstance(tags, str), "Global tags is not valid!"

	HTML_web_color_name = closest_name(HTML_web_color_name, valid_colors)
	self.color = np.array([[valid_colors[HTML_web_color_name]]], dtype=np.uint8)

	self.prefixes = [description]
	self.suffixes = detailed_descriptions

	if self.record_tags:
	self.suffixes = self.suffixes + [tags]

	self.prefixes = [safe_str(x) for x in self.prefixes]
	self.suffixes = [safe_str(x) for x in self.suffixes]

	return

	def add_local_description(
	self,
	location: str,
	offset: str,
	area: str,
	distance_to_viewer: float,
	description: str,
	detailed_descriptions: list,
	tags: str,
	atmosphere: str,
	style: str,
	quality_meta: str,
	HTML_web_color_name: str,
	):
	assert isinstance(description, str), "Local description is wrong!"
	assert (
	isinstance(distance_to_viewer, (int, float)) and distance_to_viewer > 0
	), f"The distance_to_viewer for [{description}] is not positive float number!"
	assert isinstance(detailed_descriptions, list) and all(
	isinstance(item, str) for item in detailed_descriptions
	), f"The detailed_descriptions for [{description}] is not valid!"
	assert isinstance(tags, str), f"The tags for [{description}] is not valid!"
	assert isinstance(atmosphere, str), (
	f"The atmosphere for [{description}] is not valid!"
	)
	assert isinstance(style, str), f"The style for [{description}] is not valid!"
	assert isinstance(quality_meta, str), (
	f"The quality_meta for [{description}] is not valid!"
	)

	location = closest_name(location, valid_locations)
	offset = closest_name(offset, valid_offsets)
	area = closest_name(area, valid_areas)
	HTML_web_color_name = closest_name(HTML_web_color_name, valid_colors)

	xb, yb = valid_locations[location]
	xo, yo = valid_offsets[offset]
	w, h = valid_areas[area]
	rect = (yb + yo - h // 2, yb + yo + h // 2, xb + xo - w // 2, xb + xo + w // 2)
	rect = [max(0, min(90, i)) for i in rect]
	color = np.array([[valid_colors[HTML_web_color_name]]], dtype=np.uint8)

	prefixes = self.prefixes + [description]
	suffixes = detailed_descriptions

	if self.record_tags:
	suffixes = suffixes + [tags, atmosphere, style, quality_meta]

	prefixes = [safe_str(x) for x in prefixes]
	suffixes = [safe_str(x) for x in suffixes]

	self.components.append(
	dict(
	rect=rect,
	distance_to_viewer=distance_to_viewer,
	color=color,
	prefixes=prefixes,
	suffixes=suffixes,
	location=location,
	)
	)

	return

	def process(self):
	# sort components
	self.components = sorted(
	self.components, key=lambda x: x["distance_to_viewer"], reverse=True
	)

	# compute initial latent
	# print(self.color)
	initial_latent = np.zeros(shape=(90, 90, 3), dtype=np.float32) + self.color

	for component in self.components:
	a, b, c, d = component["rect"]
	initial_latent[a:b, c:d] = (
	0.7 * component["color"] + 0.3 * initial_latent[a:b, c:d]
	)

	initial_latent = initial_latent.clip(0, 255).astype(np.uint8)

	# compute conditions

	bag_of_conditions = [
	dict(
	mask=np.ones(shape=(90, 90), dtype=np.float32),
	prefixes=self.prefixes,
	suffixes=self.suffixes,
	location="full",
	)
	]

	for i, component in enumerate(self.components):
	a, b, c, d = component["rect"]
	m = np.zeros(shape=(90, 90), dtype=np.float32)
	m[a:b, c:d] = 1.0
	bag_of_conditions.append(
	dict(
	mask=m,
	prefixes=component["prefixes"],
	suffixes=component["suffixes"],
	location=component["location"],
	)
	)

	return dict(
	initial_latent=initial_latent,
	bag_of_conditions=bag_of_conditions,
	)


	class OmostPromter(torch.nn.Module):
	def __init__(self, model=None, tokenizer=None, template="", device="cpu"):
	super().__init__()
	self.model = model
	self.tokenizer = tokenizer
	self.device = device
	if template == "":
	template = r"""You are a helpful AI assistant to compose images using the below python class `Canvas`:
	```python
	class Canvas:
	def set_global_description(self, description: str, detailed_descriptions: list[str], tags: str, HTML_web_color_name: str):
	pass

	def add_local_description(self, location: str, offset: str, area: str, distance_to_viewer: float, description: str, detailed_descriptions: list[str], tags: str, atmosphere: str, style: str, quality_meta: str, HTML_web_color_name: str):
	assert location in ["in the center", "on the left", "on the right", "on the top", "on the bottom", "on the top-left", "on the top-right", "on the bottom-left", "on the bottom-right"]
	assert offset in ["no offset", "slightly to the left", "slightly to the right", "slightly to the upper", "slightly to the lower", "slightly to the upper-left", "slightly to the upper-right", "slightly to the lower-left", "slightly to the lower-right"]
	assert area in ["a small square area", "a small vertical area", "a small horizontal area", "a medium-sized square area", "a medium-sized vertical area", "a medium-sized horizontal area", "a large square area", "a large vertical area", "a large horizontal area"]
	assert distance_to_viewer > 0
	pass
	```"""
	self.template = template

	@staticmethod
	def from_model_manager(model_manager: ModelManager):
	model, model_path = model_manager.fetch_model(
	"omost_prompt", require_model_path=True
	)
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	omost = OmostPromter(
	model=model, tokenizer=tokenizer, device=model_manager.device
	)
	return omost

	def __call__(self, prompt_dict: dict):
	raw_prompt = prompt_dict["prompt"]
	conversation = [{"role": "system", "content": self.template}]
	conversation.append({"role": "user", "content": raw_prompt})

	input_ids = self.tokenizer.apply_chat_template(
	conversation, return_tensors="pt", add_generation_prompt=True
	).to(self.device)
	streamer = TextIteratorStreamer(
	self.tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
	)
	attention_mask = torch.ones(
	input_ids.shape, dtype=torch.bfloat16, device=self.device
	)

	generate_kwargs = dict(
	input_ids=input_ids,
	streamer=streamer,
	# stopping_criteria=stopping_criteria,
	# max_new_tokens=max_new_tokens,
	do_sample=True,
	attention_mask=attention_mask,
	pad_token_id=self.tokenizer.eos_token_id,
	# temperature=temperature,
	# top_p=top_p,
	)
	self.model.generate(**generate_kwargs)
	outputs = []
	for text in streamer:
	outputs.append(text)
	llm_outputs = "".join(outputs)

	canvas = Canvas.from_bot_response(llm_outputs)
	canvas_output = canvas.process()

	prompts = [
	" ".join(_["prefixes"] + _["suffixes"][:2])
	for _ in canvas_output["bag_of_conditions"]
	]
	canvas_output["prompt"] = prompts[0]
	canvas_output["prompts"] = prompts[1:]

	raw_masks = [_["mask"] for _ in canvas_output["bag_of_conditions"]]
	masks = []
	for mask in raw_masks:
	mask[mask > 0.5] = 255
	mask = np.stack([mask] * 3, axis=-1).astype("uint8")
	masks.append(Image.fromarray(mask))

	canvas_output["masks"] = masks
	prompt_dict.update(canvas_output)
	print(f"Your prompt is extended by Omost:\n")
	cnt = 0
	for component, pmt in zip(canvas_output["bag_of_conditions"], prompts):
	loc = component["location"]
	cnt += 1
	print(f"Component {cnt} - Location : {loc}\nPrompt:{pmt}\n")

	return prompt_dict