import argparse import math import random from vqgan_clip.grad import * from vqgan_clip.helpers import * from vqgan_clip.inits import * from vqgan_clip.masking import * from vqgan_clip.optimizers import * from urllib.request import urlopen from tqdm import tqdm import sys import os from omegaconf import OmegaConf from taming.models import cond_transformer, vqgan import torch from torch import nn, optim from torch.nn import functional as F from torchvision import transforms from torchvision.transforms import functional as TF from torch.cuda import get_device_properties torch.backends.cudnn.benchmark = False from torch_optimizer import DiffGrad, AdamP, RAdam import clip import kornia.augmentation as K import numpy as np import imageio from PIL import ImageFile, Image, PngImagePlugin, ImageChops ImageFile.LOAD_TRUNCATED_IMAGES = True from subprocess import Popen, PIPE import re from packaging import version # Supress warnings import warnings warnings.filterwarnings('ignore') # Check for GPU and reduce the default image size if low VRAM default_image_size = 512 # >8GB VRAM if not torch.cuda.is_available(): default_image_size = 256 # no GPU found elif get_device_properties(0).total_memory <= 2 ** 33: # 2 ** 33 = 8,589,934,592 bytes = 8 GB default_image_size = 318 # <8GB VRAM def parse(): vq_parser = argparse.ArgumentParser(description='Image generation using VQGAN+CLIP') vq_parser.add_argument("-aug", "--augments", nargs='+', action='append', type=str, choices=['Hf','Ji','Sh','Pe','Ro','Af','Et','Ts','Er'], help="Enabled augments (latest vut method only)", default=[['Hf','Af', 'Pe', 'Ji', 'Er']], dest='augments') vq_parser.add_argument("-cd", "--cuda_device", type=str, help="Cuda device to use", default="cuda:0", dest='cuda_device') vq_parser.add_argument("-ckpt", "--vqgan_checkpoint", type=str, help="VQGAN checkpoint", default=f'checkpoints/vqgan_imagenet_f16_16384.ckpt', dest='vqgan_checkpoint') vq_parser.add_argument("-conf", "--vqgan_config", type=str, help="VQGAN config", default=f'checkpoints/vqgan_imagenet_f16_16384.yaml', dest='vqgan_config') vq_parser.add_argument("-cpe", "--change_prompt_every", type=int, help="Prompt change frequency", default=0, dest='prompt_frequency') vq_parser.add_argument("-cutm", "--cut_method", type=str, help="Cut method", choices=['original','latest'], default='latest', dest='cut_method') vq_parser.add_argument("-cutp", "--cut_power", type=float, help="Cut power", default=1., dest='cut_pow') vq_parser.add_argument("-cuts", "--num_cuts", type=int, help="Number of cuts", default=32, dest='cutn') vq_parser.add_argument("-d", "--deterministic", action='store_true', help="Enable cudnn.deterministic?", dest='cudnn_determinism') vq_parser.add_argument("-i", "--iterations", type=int, help="Number of iterations", default=500, dest='max_iterations') vq_parser.add_argument("-ifps", "--input_video_fps", type=float, help="When creating an interpolated video, use this as the input fps to interpolate from (>0 & 0: if i % args.prompt_frequency == 0 and i > 0: # In case there aren't enough phrases, just loop if p >= len(all_phrases): p = 0 pMs = [] args.prompts = all_phrases[p] # Show user we're changing prompt print(args.prompts) for prompt in args.prompts: txt, weight, stop = split_prompt(prompt) embed = perceptor.encode_text(clip.tokenize(txt).to(device)).float() pMs.append(Prompt(embed, weight, stop).to(device)) p += 1 train(i) i += 1 pbar.update() print("done")