File size: 5,131 Bytes
0c472b3 e042f04 145deb3 e042f04 330e8a2 e042f04 d4f762a e042f04 d4f762a e042f04 330e8a2 e042f04 a4e01fa e042f04 2fa3fb0 e042f04 89d4af3 e042f04 2fa3fb0 e042f04 0c472b3 e042f04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import spaces
import gradio as gr
from huggingface_hub import hf_hub_download
import os
import pickle
import torch
from argparse import Namespace
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
from io import BytesIO
from src.model import get_model
from src.utils.output_utils import prepare_output
from torchvision import transforms
REPO_ID = "Launchpad/inversecooking"
HF_TOKEN = os.environ.get("HF_TOKEN")
use_gpu = True
device = torch.device('cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
# map_loc = None if torch.cuda.is_available() and use_gpu else 'cpu'
# Inverse Cooking
ingrs_vocab = pickle.load(
open(hf_hub_download(REPO_ID, 'data/ingr_vocab.pkl', token=HF_TOKEN), 'rb')
)
vocab = pickle.load(
open(hf_hub_download(REPO_ID, 'data/instr_vocab.pkl', token=HF_TOKEN), 'rb')
)
ingr_vocab_size = len(ingrs_vocab)
instrs_vocab_size = len(vocab)
# Hardcoded args
args = Namespace(
aux_data_dir='../data', batch_size=128, beam=-1, crop_size=224,
decay_lr=True, dropout_decoder_i=0.3, dropout_decoder_r=0.3,
dropout_encoder=0.3, embed_size=512, es_metric='loss',
eval_split='val', finetune_after=-1, get_perplexity=False,
greedy=False, image_model='resnet50', image_size=256,
ingrs_only=True, label_smoothing_ingr=0.1, learning_rate=0.001,
log_step=10, log_term=False, loss_weight=[1.0, 0.0, 0.0, 0.0],
lr_decay_every=1, lr_decay_rate=0.99, max_eval=4096, maxnumims=5,
maxnuminstrs=10, maxnumlabels=20, maxseqlen=15, model_name='model',
n_att=8, n_att_ingrs=4, num_epochs=400, num_workers=8, numgens=3,
patience=50, project_name='inversecooking',
recipe1m_dir='path/to/recipe1m', recipe_only=False, resume=False,
save_dir='path/to/save/models', scale_learning_rate_cnn=0.01,
suff='', temperature=1.0, tensorboard=True, transf_layers=16,
transf_layers_ingrs=4, transfer_from='', use_lmdb=True,
use_true_ingrs=False, weight_decay=0.0
)
args.maxseqlen = 15
args.ingrs_only = False
# Load the trained model parameters
model = get_model(args, ingr_vocab_size, instrs_vocab_size)
# model.load_state_dict(torch.load(
# hf_hub_download(REPO_ID, 'data/modelbest.ckpt', token=HF_TOKEN), map_location=map_loc)
# )
model.load_state_dict(torch.load(
hf_hub_download(REPO_ID, 'data/modelbest.ckpt', token=HF_TOKEN), map_location=torch.device('cpu'))
)
model.eval()
model.ingrs_only = False
model.recipe_only = False
model = model.to(device)
transform_list = []
transform_list.append(transforms.ToTensor())
transform_list.append(transforms.Resize(256))
transform_list.append(transforms.CenterCrop(224))
transform_list.append(transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
transform = transforms.Compose(transform_list)
greedy = [True, False, False, False]
beam = [-1, -1, -1, -1]
temperature = 1.0
numgens = 1
# StableDiffusion
pipe = StableDiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4').to(device)
@spaces.GPU
def generate_image(input_img):
# Inverse Cooking
image_tensor = transform(input_img).unsqueeze(0).to(device)
for i in range(numgens):
with torch.no_grad():
outputs = model.sample(image_tensor, greedy=greedy[i],
temperature=temperature, beam=beam[i], true_ingrs=None)
ingr_ids = outputs['ingr_ids'].cpu().numpy()
recipe_ids = outputs['recipe_ids'].cpu().numpy()
outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)
recipe_name = outs['title']
ingredients = outs['ingrs'] # ingredient list
# Create hardcoded StableDiffusion prompt
ingredients = ', '.join(ingredients)
prompt = "Fancy food plating of " + recipe_name + " with ingredients " + ingredients
print(prompt)
# {"prompt": prompt, "ingredients": ingredients, "ingr_ids": ingr_ids}
# StableDiffusion
new_image = pipe(prompt).images[0]
return new_image
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=1):
gr.Image("https://www.ocf.berkeley.edu/~launchpad/media/uploads/project_logos/414478903_2298162417059609_260250523028403756_n_yt9pGFm.png", elem_id="logo-img", show_label=False, show_share_button=False, show_download_button=False)
with gr.Column(scale=3):
gr.Markdown("""Lunchpad is a [Launchpad](https://launchpad.studentorg.berkeley.edu/) project (Spring 2023) that transforms pictures of food to fancy plated versions through a novel transformer architecture and latent diffusion models.
<br/><br/>
**Model**: [Inverse Cooking](https://arxiv.org/abs/1812.06164), [Stable-Diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4)
<br/>
**Developed by**: Sebastian Zhao, Annabelle Park, Nikhil Pitta, Tanush Talati, Rahul Vijay, Jade Wang, Tony Xin
"""
)
with gr.Row():
gr.Interface(generate_image, gr.Image(), "image")
if __name__ == '__main__':
demo.launch()
|