SkillForge45
/

CarrotVid-0.1-1B-Demo

Model card Files Files and versions

SkillForge45 commited on Aug 1

Commit

74de93b

·

verified ·

1 Parent(s): a7bbbd5

Create tokenizer.py

Files changed (1) hide show

de_en/tokenizer.py +60 -0

de_en/tokenizer.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+from torchvision import transforms
+from PIL import Image
+import numpy as np
+import cv2
+class VideoTokenizer:
+    def __init__(self, resolution=128):
+        self.resolution = resolution
+        self.transform = transforms.Compose([
+            transforms.Resize((resolution, resolution)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+        ])
+    def encode_image(self, image):
+        if isinstance(image, str):
+            image = Image.open(image).convert('RGB')
+        return self.transform(image)
+    def encode_video(self, video_path, max_frames=24):
+        # For simplicity, we'll assume video_path is actually a tensor in our dataset
+        # In a real implementation, this would read frames from video file
+        if isinstance(video_path, torch.Tensor):
+            return video_path
+        cap = cv2.VideoCapture(video_path)
+        frames = []
+        frame_count = 0
+        while cap.isOpened() and frame_count < max_frames:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+            frame = self.transform(frame)
+            frames.append(frame)
+            frame_count += 1
+        cap.release()
+        # Pad if video is shorter than max_frames
+        while len(frames) < max_frames:
+            frames.append(torch.zeros_like(frames[0]))
+        return torch.stack(frames)
+    def save_video(self, frames, output_path, fps=24):
+        frames = (frames.clamp(-1, 1) + 1) / 2  # [-1,1] to [0,1]
+        frames = (frames.permute(0, 2, 3, 1).numpy() * 255).astype('uint8')
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (self.resolution, self.resolution))
+        for frame in frames:
+            bgr_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            out.write(bgr_frame)
+        out.release()