File size: 5,131 Bytes
0c472b3
e042f04
 
 
 
 
 
 
 
 
 
145deb3
e042f04
 
 
 
 
 
 
 
 
330e8a2
e042f04
 
 
d4f762a
e042f04
 
d4f762a
e042f04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330e8a2
 
 
e042f04
a4e01fa
e042f04
 
 
 
2fa3fb0
e042f04
 
89d4af3
e042f04
 
 
 
 
 
 
 
 
 
 
 
2fa3fb0
e042f04
0c472b3
e042f04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import spaces
import gradio as gr
from huggingface_hub import hf_hub_download

import os
import pickle
import torch

from argparse import Namespace
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline
from io import BytesIO
from src.model import get_model
from src.utils.output_utils import prepare_output
from torchvision import transforms


REPO_ID = "Launchpad/inversecooking"
HF_TOKEN = os.environ.get("HF_TOKEN")

use_gpu = True
device = torch.device('cuda' if torch.cuda.is_available() and use_gpu else 'cpu')
# map_loc = None if torch.cuda.is_available() and use_gpu else 'cpu'

# Inverse Cooking
ingrs_vocab = pickle.load(
    open(hf_hub_download(REPO_ID, 'data/ingr_vocab.pkl', token=HF_TOKEN), 'rb')
)
vocab = pickle.load(
    open(hf_hub_download(REPO_ID, 'data/instr_vocab.pkl', token=HF_TOKEN), 'rb')
)

ingr_vocab_size = len(ingrs_vocab)
instrs_vocab_size = len(vocab)

# Hardcoded args
args = Namespace(
    aux_data_dir='../data', batch_size=128, beam=-1, crop_size=224,
    decay_lr=True, dropout_decoder_i=0.3, dropout_decoder_r=0.3,
    dropout_encoder=0.3, embed_size=512, es_metric='loss',
    eval_split='val', finetune_after=-1, get_perplexity=False,
    greedy=False, image_model='resnet50', image_size=256,
    ingrs_only=True, label_smoothing_ingr=0.1, learning_rate=0.001,
    log_step=10, log_term=False, loss_weight=[1.0, 0.0, 0.0, 0.0],
    lr_decay_every=1, lr_decay_rate=0.99, max_eval=4096, maxnumims=5,
    maxnuminstrs=10, maxnumlabels=20, maxseqlen=15, model_name='model',
    n_att=8, n_att_ingrs=4, num_epochs=400, num_workers=8, numgens=3,
    patience=50, project_name='inversecooking',
    recipe1m_dir='path/to/recipe1m', recipe_only=False, resume=False,
    save_dir='path/to/save/models', scale_learning_rate_cnn=0.01,
    suff='', temperature=1.0, tensorboard=True, transf_layers=16,
    transf_layers_ingrs=4, transfer_from='', use_lmdb=True,
    use_true_ingrs=False, weight_decay=0.0
)
args.maxseqlen = 15
args.ingrs_only = False
        
# Load the trained model parameters
model = get_model(args, ingr_vocab_size, instrs_vocab_size)
# model.load_state_dict(torch.load(
#     hf_hub_download(REPO_ID, 'data/modelbest.ckpt', token=HF_TOKEN), map_location=map_loc)
# )
model.load_state_dict(torch.load(
    hf_hub_download(REPO_ID, 'data/modelbest.ckpt', token=HF_TOKEN), map_location=torch.device('cpu'))
)
model.eval()
model.ingrs_only = False
model.recipe_only = False
model = model.to(device)
        
transform_list = []
transform_list.append(transforms.ToTensor())
transform_list.append(transforms.Resize(256))
transform_list.append(transforms.CenterCrop(224))
transform_list.append(transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)))
transform = transforms.Compose(transform_list)
        

greedy = [True, False, False, False]
beam = [-1, -1, -1, -1]
temperature = 1.0
numgens = 1

# StableDiffusion
pipe = StableDiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4').to(device)

@spaces.GPU
def generate_image(input_img):

    # Inverse Cooking
    image_tensor = transform(input_img).unsqueeze(0).to(device)

    for i in range(numgens):
        with torch.no_grad():
            outputs = model.sample(image_tensor, greedy=greedy[i], 
                                   temperature=temperature, beam=beam[i], true_ingrs=None)

        ingr_ids = outputs['ingr_ids'].cpu().numpy()
        recipe_ids = outputs['recipe_ids'].cpu().numpy()

        outs, valid = prepare_output(recipe_ids[0], ingr_ids[0], ingrs_vocab, vocab)

        recipe_name = outs['title']
        ingredients = outs['ingrs'] # ingredient list

    # Create hardcoded StableDiffusion prompt
    ingredients = ', '.join(ingredients)
    prompt = "Fancy food plating of " + recipe_name + " with ingredients " + ingredients
    print(prompt)

    # {"prompt": prompt, "ingredients": ingredients, "ingr_ids": ingr_ids}
    
    # StableDiffusion
    new_image = pipe(prompt).images[0]
    return new_image

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=1):
            gr.Image("https://www.ocf.berkeley.edu/~launchpad/media/uploads/project_logos/414478903_2298162417059609_260250523028403756_n_yt9pGFm.png", elem_id="logo-img", show_label=False, show_share_button=False, show_download_button=False)
            
        with gr.Column(scale=3):
          gr.Markdown("""Lunchpad is a [Launchpad](https://launchpad.studentorg.berkeley.edu/) project (Spring 2023) that transforms pictures of food to fancy plated versions through a novel transformer architecture and latent diffusion models.
                      <br/><br/>
                      **Model**: [Inverse Cooking](https://arxiv.org/abs/1812.06164), [Stable-Diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4)
                      <br/>
                      **Developed by**: Sebastian Zhao, Annabelle Park, Nikhil Pitta, Tanush Talati, Rahul Vijay, Jade Wang, Tony Xin
                      """
                      )
    with gr.Row():  
        gr.Interface(generate_image, gr.Image(), "image")

if __name__ == '__main__':
    demo.launch()