import sys sys.path.append('./') from adaface.adaface_wrapper import AdaFaceWrapper import torch import numpy as np import random import gradio as gr import spaces import argparse parser = argparse.ArgumentParser() parser.add_argument("--adaface_encoder_types", type=str, nargs="+", default=["consistentID", "arc2face"], choices=["arc2face", "consistentID"], help="Type(s) of the ID2Ada prompt encoders") parser.add_argument('--adaface_ckpt_path', type=str, default='models/adaface/VGGface2_HQ_masks2024-10-14T16-09-24_zero3-ada-3500.pt', help="Paths to the checkpoints of the ID2Ada prompt encoders") # If adaface_encoder_cfg_scales is not specified, the weights will be set to 6.0 (consistentID) and 1.0 (arc2face). parser.add_argument('--adaface_encoder_cfg_scales', type=float, nargs="+", default=None, help="Scales for the ID2Ada prompt encoders") parser.add_argument("--enabled_encoders", type=str, nargs="+", default=None, choices=["arc2face", "consistentID"], help="List of enabled encoders (among the list of adaface_encoder_types). Default: None (all enabled)") parser.add_argument('--model_style_type', type=str, default='realistic', choices=["realistic", "anime", "photorealistic"], help="Type of the base model") parser.add_argument('--extra_unet_dirpaths', type=str, nargs="*", default=[], help="Extra paths to the checkpoints of the UNet models") parser.add_argument('--unet_weights', type=float, nargs="+", default=[1], help="Weights for the UNet models") parser.add_argument("--guidance_scale", type=float, default=8.0, help="The guidance scale for the diffusion model. Default: 8.0") parser.add_argument("--do_neg_id_prompt_weight", type=float, default=0.0, help="The weight of added ID prompt embeddings into the negative prompt. Default: 0, disabled.") parser.add_argument('--gpu', type=int, default=None) parser.add_argument('--ip', type=str, default="0.0.0.0") args = parser.parse_args() model_style_type2base_model_path = { "realistic": "models/rv51/realisticVisionV51_v51VAE_dste8.safetensors", "anime": "models/aingdiffusion/aingdiffusion_v170_ar.safetensors", "photorealistic": "models/sar/sar.safetensors" # LDM format. Needs to be converted. } base_model_path = model_style_type2base_model_path[args.model_style_type] # global variable MAX_SEED = np.iinfo(np.int32).max device = "cuda" if args.gpu is None else f"cuda:{args.gpu}" print(f"Device: {device}") global adaface adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path, adaface_encoder_types=args.adaface_encoder_types, adaface_ckpt_paths=args.adaface_ckpt_path, adaface_encoder_cfg_scales=args.adaface_encoder_cfg_scales, enabled_encoders=args.enabled_encoders, unet_types=None, extra_unet_dirpaths=args.extra_unet_dirpaths, unet_weights=args.unet_weights, device='cpu') def randomize_seed_fn(seed: int, randomize_seed: bool) -> int: if randomize_seed: seed = random.randint(0, MAX_SEED) return seed def swap_to_gallery(images): # Update uploaded_files_gallery, show files, hide clear_button_column # Or: # Update uploaded_init_img_gallery, show init_img_files, hide init_clear_button_column return gr.update(value=images, visible=True), gr.update(visible=True), gr.update(value=images, visible=False) def remove_back_to_files(): # Hide uploaded_files_gallery, show clear_button_column, hide files, reset init_img_selected_idx # Or: # Hide uploaded_init_img_gallery, hide init_clear_button_column, show init_img_files, reset init_img_selected_idx return gr.update(visible=False), gr.update(visible=False), gr.update(value=None, visible=True) @spaces.GPU def generate_image(image_paths, guidance_scale, do_neg_id_prompt_weight, perturb_std, num_images, prompt, negative_prompt, enhance_face, seed, progress=gr.Progress(track_tqdm=True)): global adaface adaface.to(device) if image_paths is None or len(image_paths) == 0: raise gr.Error(f"Cannot find any input face image! Please upload a face image.") if prompt is None: prompt = "" adaface_subj_embs = \ adaface.prepare_adaface_embeddings(image_paths=image_paths, face_id_embs=None, avg_at_stage='id_emb', perturb_at_stage='img_prompt_emb', perturb_std=perturb_std, update_text_encoder=True) if adaface_subj_embs is None: raise gr.Error(f"Failed to detect any faces! Please try with other images") # Sometimes the pipeline is on CPU, although we've put it on CUDA (due to some offloading mechanism). # Therefore we set the generator to the correct device. generator = torch.Generator(device=device).manual_seed(seed) print(f"Manual seed: {seed}. do_neg_id_prompt_weight: {do_neg_id_prompt_weight}.") # Generate two images each time for the user to select from. noise = torch.randn(num_images, 3, 512, 512, device=device, generator=generator) #print(noise.abs().sum()) # samples: A list of PIL Image instances. if enhance_face and "face portrait" not in prompt: if "portrait" in prompt: # Enhance the face features by replacing "portrait" with "face portrait". prompt = prompt.replace("portrait", "face portrait") else: prompt = "face portrait, " + prompt generator = torch.Generator(device=adaface.pipeline._execution_device).manual_seed(seed) samples = adaface(noise, prompt, negative_prompt, do_neg_id_prompt_weight=do_neg_id_prompt_weight, guidance_scale=guidance_scale, out_image_count=num_images, generator=generator, verbose=True) return samples def check_prompt_and_model_type(prompt, model_style_type): global adaface model_style_type = model_style_type.lower() base_model_path = model_style_type2base_model_path[model_style_type] # If the base model type is changed, reload the model. if model_style_type != args.model_style_type: adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path, adaface_encoder_types=args.adaface_encoder_types, adaface_ckpt_paths=args.adaface_ckpt_path, device='cpu') # Update base model type. args.model_style_type = model_style_type if not prompt: raise gr.Error("Prompt cannot be blank") ### Description title = r"""