Baraaqasem's picture
Upload 49 files
413d4d0 verified
import torch
class CogVideoX:
def __init__(self, weight="THUDM/CogVideoX-2b", device="cuda"):
"""
Initializes the CogVideo model with a specific device.
Args:
device (str, optional): The device to run the model on. Defaults to "cuda".
"""
from diffusers import CogVideoXPipeline
self.pipe = CogVideoXPipeline.from_pretrained(weight).to("cuda")
def infer_one_video(
self,
prompt: str = None,
size: list = [320, 512],
seconds: int = 2,
fps: int = 8,
seed: int = 42,
):
"""
Generates a single video based on the provided prompt and parameters.
Args:
prompt (str, optional): The text prompt to generate the video from. Defaults to None.
size (list, optional): The size of the video as [height, width]. Defaults to [320, 512].
seconds (int, optional): The duration of the video in seconds. Defaults to 2.
fps (int, optional): The frames per second of the video. Defaults to 8.
seed (int, optional): The seed for random number generation. Defaults to 42.
Returns:
torch.Tensor: The generated video as a tensor.
"""
video = self.pipe(prompt=prompt,
guidance_scale=6,
num_frames=seconds * fps,
#height=size[0],
#width=size[1],
num_inference_steps=50,
generator=torch.manual_seed(seed)).frames[0]
from videogen_hub.utils import images_to_tensor
video = video[:-1] # drop the last frame
video = images_to_tensor(video) # parse it back to tensor (T, C, H, W)
return video