Spaces:

jbilcke-hf
/

VideoModelStudio

Running

App Files Files Community

jbilcke-hf HF staff commited on 16 days ago

Commit

d2662cc

1 Parent(s): 4a3f789

fix

Browse files

Files changed (10) hide show

vms/config.py +10 -10
vms/ui/app_ui.py +30 -2
vms/ui/monitoring/services/monitoring.py +1 -0
vms/ui/monitoring/tabs/general_tab.py +1 -1
vms/ui/project/services/captioning.py +1 -0
vms/ui/project/services/previewing.py +123 -56
vms/ui/project/services/splitting.py +1 -0
vms/ui/project/services/training.py +26 -7
vms/ui/project/tabs/preview_tab.py +147 -106
vms/ui/project/tabs/train_tab.py +112 -48

vms/config.py CHANGED Viewed

@@ -61,7 +61,7 @@ JPEG_QUALITY = int(os.environ.get('JPEG_QUALITY', '97'))
 MODEL_TYPES = {
     "HunyuanVideo": "hunyuan_video",
     "LTX-Video": "ltx_video",
-    "Wan-2.1-T2V": "wan"
 }
 # Training types
@@ -70,8 +70,8 @@ TRAINING_TYPES = {
     "Full Finetune": "full-finetune"
 }
-# Model variants for each model type
-MODEL_VARIANTS = {
     "wan": {
         "Wan-AI/Wan2.1-T2V-1.3B-Diffusers": {
             "name": "Wan 2.1 T2V 1.3B (text-only, smaller)",
@@ -342,7 +342,7 @@ class TrainingConfig:
     # Optional arguments follow
     revision: Optional[str] = None
-    variant: Optional[str] = None
     cache_dir: Optional[str] = None
     # Dataset arguments
@@ -415,7 +415,7 @@ class TrainingConfig:
             train_steps=DEFAULT_NB_TRAINING_STEPS,
             lr=2e-5,
             gradient_checkpointing=True,
-            id_token="afkx",
             gradient_accumulation_steps=1,
             lora_rank=DEFAULT_LORA_RANK,
             lora_alpha=DEFAULT_LORA_ALPHA,
@@ -437,7 +437,7 @@ class TrainingConfig:
             train_steps=DEFAULT_NB_TRAINING_STEPS,
             lr=DEFAULT_LEARNING_RATE,
             gradient_checkpointing=True,
-            id_token="BW_STYLE",
             gradient_accumulation_steps=4,
             lora_rank=DEFAULT_LORA_RANK,
             lora_alpha=DEFAULT_LORA_ALPHA,
@@ -459,7 +459,7 @@ class TrainingConfig:
             train_steps=DEFAULT_NB_TRAINING_STEPS,
             lr=1e-5,
             gradient_checkpointing=True,
-            id_token="BW_STYLE",
             gradient_accumulation_steps=1,
             video_resolution_buckets=buckets or SMALL_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
@@ -479,7 +479,7 @@ class TrainingConfig:
             train_steps=DEFAULT_NB_TRAINING_STEPS,
             lr=5e-5,
             gradient_checkpointing=True,
-            id_token=None,  # Default is no ID token for Wan
             gradient_accumulation_steps=1,
             lora_rank=32,
             lora_alpha=32,
@@ -502,8 +502,8 @@ class TrainingConfig:
         args.extend(["--pretrained_model_name_or_path", self.pretrained_model_name_or_path])
         if self.revision:
             args.extend(["--revision", self.revision])
-        if self.variant:
-            args.extend(["--variant", self.variant])
         if self.cache_dir:
             args.extend(["--cache_dir", self.cache_dir])

 MODEL_TYPES = {
     "HunyuanVideo": "hunyuan_video",
     "LTX-Video": "ltx_video",
+    "Wan": "wan"
 }
 # Training types
     "Full Finetune": "full-finetune"
 }
+# Model versions for each model type
+MODEL_VERSIONS = {
     "wan": {
         "Wan-AI/Wan2.1-T2V-1.3B-Diffusers": {
             "name": "Wan 2.1 T2V 1.3B (text-only, smaller)",
     # Optional arguments follow
     revision: Optional[str] = None
+    version: Optional[str] = None
     cache_dir: Optional[str] = None
     # Dataset arguments
             train_steps=DEFAULT_NB_TRAINING_STEPS,
             lr=2e-5,
             gradient_checkpointing=True,
+            id_token=None,
             gradient_accumulation_steps=1,
             lora_rank=DEFAULT_LORA_RANK,
             lora_alpha=DEFAULT_LORA_ALPHA,
             train_steps=DEFAULT_NB_TRAINING_STEPS,
             lr=DEFAULT_LEARNING_RATE,
             gradient_checkpointing=True,
+            id_token=None,
             gradient_accumulation_steps=4,
             lora_rank=DEFAULT_LORA_RANK,
             lora_alpha=DEFAULT_LORA_ALPHA,
             train_steps=DEFAULT_NB_TRAINING_STEPS,
             lr=1e-5,
             gradient_checkpointing=True,
+            id_token=None,
             gradient_accumulation_steps=1,
             video_resolution_buckets=buckets or SMALL_TRAINING_BUCKETS,
             caption_dropout_p=DEFAULT_CAPTION_DROPOUT_P,
             train_steps=DEFAULT_NB_TRAINING_STEPS,
             lr=5e-5,
             gradient_checkpointing=True,
+            id_token=None,
             gradient_accumulation_steps=1,
             lora_rank=32,
             lora_alpha=32,
         args.extend(["--pretrained_model_name_or_path", self.pretrained_model_name_or_path])
         if self.revision:
             args.extend(["--revision", self.revision])
+        if self.version:
+            args.extend(["--variant", self.version])
         if self.cache_dir:
             args.extend(["--cache_dir", self.cache_dir])

vms/ui/app_ui.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Any, Optional, Dict, List, Union, Tuple
 from vms.config import (
     STORAGE_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH, OUTPUT_PATH,
     TRAINING_PATH, LOG_FILE_PATH, TRAINING_PRESETS, TRAINING_VIDEOS_PATH, MODEL_PATH, OUTPUT_PATH,
-    MODEL_TYPES, SMALL_TRAINING_BUCKETS, TRAINING_TYPES,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
@@ -220,6 +220,7 @@ class AppUI:
                     self.project_tabs["train_tab"].components["pause_resume_btn"],
                     self.project_tabs["train_tab"].components["training_preset"],
                     self.project_tabs["train_tab"].components["model_type"],
                     self.project_tabs["train_tab"].components["training_type"],
                     self.project_tabs["train_tab"].components["lora_rank"],
                     self.project_tabs["train_tab"].components["lora_alpha"],
@@ -378,6 +379,20 @@ class AppUI:
                 model_type_val = list(MODEL_TYPES.keys())[0]
                 logger.warning(f"Invalid model type '{model_type_val}', using default: {model_type_val}")
         # Ensure training_type is a valid display name
         training_type_val = ui_state.get("training_type", list(TRAINING_TYPES.keys())[0])
         if training_type_val not in TRAINING_TYPES:
@@ -436,6 +451,7 @@ class AppUI:
             delete_checkpoints_btn,
             training_preset,
             model_type_val,
             training_type_val,
             lora_rank_val,
             lora_alpha_val,
@@ -453,10 +469,22 @@ class AppUI:
         """Initialize UI components from saved state"""
         ui_state = self.load_ui_values()
         # Return values in order matching the outputs in app.load
         return (
             ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
-            ui_state.get("model_type", list(MODEL_TYPES.keys())[0]),
             ui_state.get("training_type", list(TRAINING_TYPES.keys())[0]),
             ui_state.get("lora_rank", DEFAULT_LORA_RANK_STR),
             ui_state.get("lora_alpha", DEFAULT_LORA_ALPHA_STR),

 from vms.config import (
     STORAGE_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH, OUTPUT_PATH,
     TRAINING_PATH, LOG_FILE_PATH, TRAINING_PRESETS, TRAINING_VIDEOS_PATH, MODEL_PATH, OUTPUT_PATH,
+    MODEL_TYPES, SMALL_TRAINING_BUCKETS, TRAINING_TYPES, MODEL_VERSIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
                     self.project_tabs["train_tab"].components["pause_resume_btn"],
                     self.project_tabs["train_tab"].components["training_preset"],
                     self.project_tabs["train_tab"].components["model_type"],
+                    self.project_tabs["train_tab"].components["model_version"],
                     self.project_tabs["train_tab"].components["training_type"],
                     self.project_tabs["train_tab"].components["lora_rank"],
                     self.project_tabs["train_tab"].components["lora_alpha"],
                 model_type_val = list(MODEL_TYPES.keys())[0]
                 logger.warning(f"Invalid model type '{model_type_val}', using default: {model_type_val}")
+        # Get model_version value
+        model_version_val = ""
+        # First get the internal model type for the currently selected model
+        model_internal_type = MODEL_TYPES.get(model_type_val)
+        if model_internal_type and model_internal_type in MODEL_VERSIONS:
+            # If there's a saved model_version and it's valid for this model type
+            if "model_version" in ui_state and ui_state["model_version"] in MODEL_VERSIONS.get(model_internal_type, {}):
+                model_version_val = ui_state["model_version"]
+            else:
+                # Otherwise use the first available version
+                versions = list(MODEL_VERSIONS.get(model_internal_type, {}).keys())
+                if versions:
+                    model_version_val = versions[0]
         # Ensure training_type is a valid display name
         training_type_val = ui_state.get("training_type", list(TRAINING_TYPES.keys())[0])
         if training_type_val not in TRAINING_TYPES:
             delete_checkpoints_btn,
             training_preset,
             model_type_val,
+            model_version_val,
             training_type_val,
             lora_rank_val,
             lora_alpha_val,
         """Initialize UI components from saved state"""
         ui_state = self.load_ui_values()
+        # Get model type and determine the default model version if not specified
+        model_type = ui_state.get("model_type", list(MODEL_TYPES.keys())[0])
+        model_internal_type = MODEL_TYPES.get(model_type)
+        # Get model_version, defaulting to first available version if not set
+        model_version = ui_state.get("model_version", "")
+        if not model_version and model_internal_type and model_internal_type in MODEL_VERSIONS:
+            versions = list(MODEL_VERSIONS.get(model_internal_type, {}).keys())
+            if versions:
+                model_version = versions[0]
         # Return values in order matching the outputs in app.load
         return (
             ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
+            model_type,
+            model_version,
             ui_state.get("training_type", list(TRAINING_TYPES.keys())[0]),
             ui_state.get("lora_rank", DEFAULT_LORA_RANK_STR),
             ui_state.get("lora_alpha", DEFAULT_LORA_ALPHA_STR),

vms/ui/monitoring/services/monitoring.py CHANGED Viewed

@@ -22,6 +22,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 logger = logging.getLogger(__name__)
 class MonitoringService:
     """Service for monitoring system resources and performance"""

 import numpy as np
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 class MonitoringService:
     """Service for monitoring system resources and performance"""

vms/ui/monitoring/tabs/general_tab.py CHANGED Viewed

@@ -17,7 +17,7 @@ from vms.config import STORAGE_PATH
 from vms.ui.monitoring.utils import get_folder_size, human_readable_size
 logger = logging.getLogger(__name__)
 class GeneralTab(BaseTab):
     """Monitor tab for general system resource monitoring"""

 from vms.ui.monitoring.utils import get_folder_size, human_readable_size
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 class GeneralTab(BaseTab):
     """Monitor tab for general system resource monitoring"""

vms/ui/project/services/captioning.py CHANGED Viewed

@@ -21,6 +21,7 @@ from vms.config import TRAINING_VIDEOS_PATH, STAGING_PATH, PRELOAD_CAPTIONING_MO
 from vms.utils import extract_scene_info, is_image_file, is_video_file, copy_files_to_training_dir, prepare_finetrainers_dataset
 logger = logging.getLogger(__name__)
 @dataclass
 class CaptioningProgress:

 from vms.utils import extract_scene_info, is_image_file, is_video_file, copy_files_to_training_dir, prepare_finetrainers_dataset
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 @dataclass
 class CaptioningProgress:

vms/ui/project/services/previewing.py CHANGED Viewed

@@ -6,17 +6,20 @@ Handles the video generation logic and model integration
 import logging
 import tempfile
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Tuple, Callable
 import time
 from vms.config import (
     OUTPUT_PATH, STORAGE_PATH, MODEL_TYPES, TRAINING_PATH,
-    DEFAULT_PROMPT_PREFIX, MODEL_VARIANTS
 )
 from vms.utils import format_time
 logger = logging.getLogger(__name__)
 class PreviewingService:
     """Handles the video generation logic and model integration"""
@@ -48,14 +51,14 @@ class PreviewingService:
             logger.error(f"Error finding LoRA weights: {e}")
             return None
-    def get_model_variants(self, model_type: str) -> Dict[str, Dict[str, str]]:
-        """Get available model variants for the given model type"""
-        return MODEL_VARIANTS.get(model_type, {})
     def generate_video(
         self,
         model_type: str,
-        model_variant: str,
         prompt: str,
         negative_prompt: str,
         prompt_prefix: str,
@@ -66,13 +69,15 @@ class PreviewingService:
         flow_shift: float,
         lora_weight: float,
         inference_steps: int,
-        enable_cpu_offload: bool,
-        fps: int,
         conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate a video using the trained model"""
         try:
             log_messages = []
             def log(msg: str):
                 log_messages.append(msg)
@@ -102,33 +107,46 @@ class PreviewingService:
             if not internal_model_type:
                 return None, f"Error: Invalid model type {model_type}", log(f"Error: Invalid model type {model_type}")
-            # Check if model variant is valid for this model type
-            variants = self.get_model_variants(internal_model_type)
-            if model_variant not in variants:
-                # Use default variant if specified one is invalid
-                if len(variants) > 0:
-                    model_variant = next(iter(variants.keys()))
-                    log(f"Warning: Invalid model variant, using default: {model_variant}")
                 else:
-                    # Fall back to default IDs if no variants defined
                     if internal_model_type == "wan":
-                        model_variant = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
                     elif internal_model_type == "ltx_video":
-                        model_variant = "Lightricks/LTX-Video"
                     elif internal_model_type == "hunyuan_video":
-                        model_variant = "hunyuanvideo-community/HunyuanVideo"
-                    log(f"Warning: No variants defined for model type, using default: {model_variant}")
             # Check if this is an image-to-video model but no image was provided
-            variant_info = variants.get(model_variant, {})
-            if variant_info.get("type") == "image-to-video" and not conditioning_image:
-                return None, "Error: This model requires a conditioning image", log("Error: This model variant requires a conditioning image but none was provided")
             log(f"Generating video with model type: {internal_model_type}")
-            log(f"Using model variant: {model_variant}")
             log(f"Using LoRA weights from: {lora_path}")
             log(f"Resolution: {width}x{height}, Frames: {num_frames}, FPS: {fps}")
             log(f"Guidance Scale: {guidance_scale}, Flow Shift: {flow_shift}, LoRA Weight: {lora_weight}")
             log(f"Prompt: {full_prompt}")
             log(f"Negative Prompt: {negative_prompt}")
@@ -137,22 +155,22 @@ class PreviewingService:
                 return self.generate_wan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
-                    inference_steps, enable_cpu_offload, fps, log,
-                    model_variant, conditioning_image
                 )
             elif internal_model_type == "ltx_video":
                 return self.generate_ltx_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
-                    inference_steps, enable_cpu_offload, fps, log,
-                    model_variant, conditioning_image
                 )
             elif internal_model_type == "hunyuan_video":
                 return self.generate_hunyuan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
-                    inference_steps, enable_cpu_offload, fps, log,
-                    model_variant, conditioning_image
                 )
             else:
                 return None, f"Error: Unsupported model type {internal_model_type}", log(f"Error: Unsupported model type {internal_model_type}")
@@ -173,16 +191,18 @@ class PreviewingService:
         lora_path: str,
         lora_weight: float,
         inference_steps: int,
-        enable_cpu_offload: bool,
-        fps: int,
-        log_fn: Callable,
-        model_variant: str = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
         conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using Wan model"""
         try:
             import torch
             from diffusers import AutoencoderKLWan, WanPipeline
             from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
             from diffusers.utils import export_to_video
@@ -192,14 +212,26 @@ class PreviewingService:
             start_time = torch.cuda.Event(enable_timing=True)
             end_time = torch.cuda.Event(enable_timing=True)
             log_fn("Importing Wan model components...")
-            log_fn(f"Loading VAE from {model_variant}...")
-            vae = AutoencoderKLWan.from_pretrained(model_variant, subfolder="vae", torch_dtype=torch.float32)
-            log_fn(f"Loading transformer from {model_variant}...")
-            pipe = WanPipeline.from_pretrained(model_variant, vae=vae, torch_dtype=torch.bfloat16)
             log_fn(f"Configuring scheduler with flow_shift={flow_shift}...")
             pipe.scheduler = UniPCMultistepScheduler.from_config(
@@ -213,11 +245,13 @@ class PreviewingService:
             if enable_cpu_offload:
                 log_fn("Enabling model CPU offload...")
                 pipe.enable_model_cpu_offload()
             log_fn(f"Loading LoRA weights from {lora_path} with weight {lora_weight}...")
             pipe.load_lora_weights(lora_path)
-            pipe.fuse_lora(lora_weight)
             # Create temporary file for the output
             with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
                 output_path = temp_file.name
@@ -226,7 +260,7 @@ class PreviewingService:
             start_time.record()
             # Check if this is an image-to-video model
-            is_i2v = "I2V" in model_variant
             if is_i2v and conditioning_image:
                 log_fn(f"Loading conditioning image from {conditioning_image}...")
@@ -243,6 +277,7 @@ class PreviewingService:
                     num_frames=num_frames,
                     guidance_scale=guidance_scale,
                     num_inference_steps=inference_steps,
                 ).frames[0]
             else:
                 log_fn("Generating video with text-only conditioning...")
@@ -254,6 +289,7 @@ class PreviewingService:
                     num_frames=num_frames,
                     guidance_scale=guidance_scale,
                     num_inference_steps=inference_steps,
                 ).frames[0]
             end_time.record()
@@ -274,11 +310,12 @@ class PreviewingService:
             return output_path, "Video generated successfully!", log_fn(f"Generation completed in {format_time(generation_time)}")
         except Exception as e:
             log_fn(f"Error generating video with Wan: {str(e)}")
             # Clean up CUDA memory
             torch.cuda.empty_cache()
             return None, f"Error: {str(e)}", log_fn(f"Exception occurred: {str(e)}")
     def generate_ltx_video(
         self,
         prompt: str,
@@ -291,27 +328,41 @@ class PreviewingService:
         lora_path: str,
         lora_weight: float,
         inference_steps: int,
-        enable_cpu_offload: bool,
-        fps: int,
-        log_fn: Callable,
-        model_variant: str = "Lightricks/LTX-Video",
         conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using LTX model"""
         try:
             import torch
             from diffusers import LTXPipeline
             from diffusers.utils import export_to_video
             from PIL import Image
             start_time = torch.cuda.Event(enable_timing=True)
             end_time = torch.cuda.Event(enable_timing=True)
             log_fn("Importing LTX model components...")
-            log_fn(f"Loading pipeline from {model_variant}...")
-            pipe = LTXPipeline.from_pretrained(model_variant, torch_dtype=torch.bfloat16)
             log_fn("Moving pipeline to CUDA device...")
             pipe.to("cuda")
@@ -342,6 +393,7 @@ class PreviewingService:
                 decode_timestep=0.03,
                 decode_noise_scale=0.025,
                 num_inference_steps=inference_steps,
             ).frames[0]
             end_time.record()
@@ -379,10 +431,11 @@ class PreviewingService:
         lora_path: str,
         lora_weight: float,
         inference_steps: int,
-        enable_cpu_offload: bool,
-        fps: int,
-        log_fn: Callable,
-        model_variant: str = "hunyuanvideo-community/HunyuanVideo",
         conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using HunyuanVideo model"""
@@ -390,24 +443,37 @@ class PreviewingService:
         try:
             import torch
             from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel, AutoencoderKLHunyuanVideo
             from diffusers.utils import export_to_video
             start_time = torch.cuda.Event(enable_timing=True)
             end_time = torch.cuda.Event(enable_timing=True)
             log_fn("Importing HunyuanVideo model components...")
-            log_fn(f"Loading transformer from {model_variant}...")
             transformer = HunyuanVideoTransformer3DModel.from_pretrained(
-                model_variant,
                 subfolder="transformer",
                 torch_dtype=torch.bfloat16
             )
-            log_fn(f"Loading pipeline from {model_variant}...")
             pipe = HunyuanVideoPipeline.from_pretrained(
-                model_variant,
                 transformer=transformer,
                 torch_dtype=torch.float16
             )
@@ -446,6 +512,7 @@ class PreviewingService:
                 guidance_scale=guidance_scale,
                 true_cfg_scale=1.0,
                 num_inference_steps=inference_steps,
             ).frames[0]
             end_time.record()

 import logging
 import tempfile
+import traceback
+import random
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Tuple, Callable
 import time
 from vms.config import (
     OUTPUT_PATH, STORAGE_PATH, MODEL_TYPES, TRAINING_PATH,
+    DEFAULT_PROMPT_PREFIX, MODEL_VERSIONS
 )
 from vms.utils import format_time
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 class PreviewingService:
     """Handles the video generation logic and model integration"""
             logger.error(f"Error finding LoRA weights: {e}")
             return None
+    def get_model_versions(self, model_type: str) -> Dict[str, Dict[str, str]]:
+        """Get available model versions for the given model type"""
+        return MODEL_VERSIONS.get(model_type, {})
     def generate_video(
         self,
         model_type: str,
+        model_version: str,
         prompt: str,
         negative_prompt: str,
         prompt_prefix: str,
         flow_shift: float,
         lora_weight: float,
         inference_steps: int,
+        seed: int = -1,
+        enable_cpu_offload: bool = True,
+        fps: int = 16,
         conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate a video using the trained model"""
         try:
             log_messages = []
+            print("generate_video")
             def log(msg: str):
                 log_messages.append(msg)
             if not internal_model_type:
                 return None, f"Error: Invalid model type {model_type}", log(f"Error: Invalid model type {model_type}")
+            # Check if model version is valid
+            # This section uses model_version directly from parameter
+            if model_version:
+                # Verify that the specified model_version exists in our versions
+                versions = self.get_model_versions(internal_model_type)
+                if model_version not in versions:
+                    log(f"Warning: Specified model version '{model_version}' is not recognized")
+                    # Fall back to default version for this model
+                    if len(versions) > 0:
+                        model_version = next(iter(versions.keys()))
+                        log(f"Using default model version instead: {model_version}")
                 else:
+                    log(f"Using specified model version: {model_version}")
+            else:
+                # No model version specified, use default
+                versions = self.get_model_versions(internal_model_type)
+                if len(versions) > 0:
+                    model_version = next(iter(versions.keys()))
+                    log(f"No model version specified, using default: {model_version}")
+                else:
+                    # Fall back to hardcoded defaults if no versions defined
                     if internal_model_type == "wan":
+                        model_version = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
                     elif internal_model_type == "ltx_video":
+                        model_version = "Lightricks/LTX-Video"
                     elif internal_model_type == "hunyuan_video":
+                        model_version = "hunyuanvideo-community/HunyuanVideo"
+                    log(f"No versions defined for model type, using default: {model_version}")
             # Check if this is an image-to-video model but no image was provided
+            model_version_info = versions.get(model_version, {})
+            if model_version_info.get("type") == "image-to-video" and not conditioning_image:
+                return None, "Error: This model requires a conditioning image", log("Error: This model version requires a conditioning image but none was provided")
             log(f"Generating video with model type: {internal_model_type}")
+            log(f"Using model version: {model_version}")
             log(f"Using LoRA weights from: {lora_path}")
             log(f"Resolution: {width}x{height}, Frames: {num_frames}, FPS: {fps}")
             log(f"Guidance Scale: {guidance_scale}, Flow Shift: {flow_shift}, LoRA Weight: {lora_weight}")
+            log(f"Generation Seed: {seed}")
             log(f"Prompt: {full_prompt}")
             log(f"Negative Prompt: {negative_prompt}")
                 return self.generate_wan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
+                    inference_steps, seed, enable_cpu_offload, fps, log,
+                    model_version, conditioning_image
                 )
             elif internal_model_type == "ltx_video":
                 return self.generate_ltx_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
+                    inference_steps, seed, enable_cpu_offload, fps, log,
+                    model_version, conditioning_image
                 )
             elif internal_model_type == "hunyuan_video":
                 return self.generate_hunyuan_video(
                     full_prompt, negative_prompt, width, height, num_frames,
                     guidance_scale, flow_shift, lora_path, lora_weight,
+                    inference_steps, seed, enable_cpu_offload, fps, log,
+                    model_version, conditioning_image
                 )
             else:
                 return None, f"Error: Unsupported model type {internal_model_type}", log(f"Error: Unsupported model type {internal_model_type}")
         lora_path: str,
         lora_weight: float,
         inference_steps: int,
+        seed: int = -1,
+        enable_cpu_offload: bool = True,
+        fps: int = 16,
+        log_fn: Callable = print,
+        model_version: str = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
         conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using Wan model"""
         try:
             import torch
+            import numpy as np
             from diffusers import AutoencoderKLWan, WanPipeline
             from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
             from diffusers.utils import export_to_video
             start_time = torch.cuda.Event(enable_timing=True)
             end_time = torch.cuda.Event(enable_timing=True)
+            print("Initializing wan generation..")
             log_fn("Importing Wan model components...")
+            # Set up random seed
+            if seed == -1:
+                seed = random.randint(0, 2**32 - 1)
+                log_fn(f"Using randomly generated seed: {seed}")
+            # Set random seeds for reproducibility
+            random.seed(seed)
+            np.random.seed(seed)
+            torch.manual_seed(seed)
+            generator = torch.Generator(device="cuda")
+            generator = generator.manual_seed(seed)
+            log_fn(f"Loading VAE from {model_version}...")
+            vae = AutoencoderKLWan.from_pretrained(model_version, subfolder="vae", torch_dtype=torch.float32)
+            log_fn(f"Loading transformer from {model_version}...")
+            pipe = WanPipeline.from_pretrained(model_version, vae=vae, torch_dtype=torch.bfloat16)
             log_fn(f"Configuring scheduler with flow_shift={flow_shift}...")
             pipe.scheduler = UniPCMultistepScheduler.from_config(
             if enable_cpu_offload:
                 log_fn("Enabling model CPU offload...")
                 pipe.enable_model_cpu_offload()
             log_fn(f"Loading LoRA weights from {lora_path} with weight {lora_weight}...")
             pipe.load_lora_weights(lora_path)
+            # TODO: Set the lora scale directly instead of using fuse_lora
+            #pipe._lora_scale = lora_weight
             # Create temporary file for the output
             with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
                 output_path = temp_file.name
             start_time.record()
             # Check if this is an image-to-video model
+            is_i2v = "I2V" in model_version
             if is_i2v and conditioning_image:
                 log_fn(f"Loading conditioning image from {conditioning_image}...")
                     num_frames=num_frames,
                     guidance_scale=guidance_scale,
                     num_inference_steps=inference_steps,
+                    generator=generator,
                 ).frames[0]
             else:
                 log_fn("Generating video with text-only conditioning...")
                     num_frames=num_frames,
                     guidance_scale=guidance_scale,
                     num_inference_steps=inference_steps,
+                    generator=generator,
                 ).frames[0]
             end_time.record()
             return output_path, "Video generated successfully!", log_fn(f"Generation completed in {format_time(generation_time)}")
         except Exception as e:
+            traceback.print_exc()
             log_fn(f"Error generating video with Wan: {str(e)}")
             # Clean up CUDA memory
             torch.cuda.empty_cache()
             return None, f"Error: {str(e)}", log_fn(f"Exception occurred: {str(e)}")
     def generate_ltx_video(
         self,
         prompt: str,
         lora_path: str,
         lora_weight: float,
         inference_steps: int,
+        seed: int = -1,
+        enable_cpu_offload: bool = True,
+        fps: int = 16,
+        log_fn: Callable = print,
+        model_version: str = "Lightricks/LTX-Video",
         conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using LTX model"""
         try:
             import torch
+            import numpy as np
             from diffusers import LTXPipeline
             from diffusers.utils import export_to_video
             from PIL import Image
             start_time = torch.cuda.Event(enable_timing=True)
             end_time = torch.cuda.Event(enable_timing=True)
+            # Set up random seed
+            if seed == -1:
+                seed = random.randint(0, 2**32 - 1)
+                log_fn(f"Using randomly generated seed: {seed}")
+            # Set random seeds for reproducibility
+            random.seed(seed)
+            np.random.seed(seed)
+            torch.manual_seed(seed)
+            generator = torch.Generator(device="cuda")
+            generator = generator.manual_seed(seed)
             log_fn("Importing LTX model components...")
+            log_fn(f"Loading pipeline from {model_version}...")
+            pipe = LTXPipeline.from_pretrained(model_version, torch_dtype=torch.bfloat16)
             log_fn("Moving pipeline to CUDA device...")
             pipe.to("cuda")
                 decode_timestep=0.03,
                 decode_noise_scale=0.025,
                 num_inference_steps=inference_steps,
+                generator=generator,
             ).frames[0]
             end_time.record()
         lora_path: str,
         lora_weight: float,
         inference_steps: int,
+        seed: int = -1,
+        enable_cpu_offload: bool = True,
+        fps: int = 16,
+        log_fn: Callable = print,
+        model_version: str = "hunyuanvideo-community/HunyuanVideo",
         conditioning_image: Optional[str] = None
     ) -> Tuple[Optional[str], str, str]:
         """Generate video using HunyuanVideo model"""
         try:
             import torch
+            import numpy as np
             from diffusers import HunyuanVideoPipeline, HunyuanVideoTransformer3DModel, AutoencoderKLHunyuanVideo
             from diffusers.utils import export_to_video
             start_time = torch.cuda.Event(enable_timing=True)
             end_time = torch.cuda.Event(enable_timing=True)
+            # Set up random seed
+            if seed == -1:
+                seed = random.randint(0, 2**32 - 1)
+                log_fn(f"Using randomly generated seed: {seed}")
+            # Set random seeds for reproducibility
+            random.seed(seed)
+            np.random.seed(seed)
+            torch.manual_seed(seed)
+            generator = torch.Generator(device="cuda")
+            generator = generator.manual_seed(seed)
             log_fn("Importing HunyuanVideo model components...")
+            log_fn(f"Loading transformer from {model_version}...")
             transformer = HunyuanVideoTransformer3DModel.from_pretrained(
+                model_version,
                 subfolder="transformer",
                 torch_dtype=torch.bfloat16
             )
+            log_fn(f"Loading pipeline from {model_version}...")
             pipe = HunyuanVideoPipeline.from_pretrained(
+                model_version,
                 transformer=transformer,
                 torch_dtype=torch.float16
             )
                 guidance_scale=guidance_scale,
                 true_cfg_scale=1.0,
                 num_inference_steps=inference_steps,
+                generator=generator,
             ).frames[0]
             end_time.record()

vms/ui/project/services/splitting.py CHANGED Viewed

@@ -16,6 +16,7 @@ from vms.config import TRAINING_PATH, STORAGE_PATH, TRAINING_VIDEOS_PATH, VIDEOS
 from vms.utils import remove_black_bars, extract_scene_info, is_video_file, is_image_file, add_prefix_to_caption
 logger = logging.getLogger(__name__)
 class SplittingService:
     def __init__(self):

 from vms.utils import remove_black_bars, extract_scene_info, is_video_file, is_image_file, add_prefix_to_caption
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 class SplittingService:
     def __init__(self):

vms/ui/project/services/training.py CHANGED Viewed

@@ -23,7 +23,7 @@ from huggingface_hub import upload_folder, create_repo
 from vms.config import (
     TrainingConfig, TRAINING_PRESETS, LOG_FILE_PATH, TRAINING_VIDEOS_PATH,
     STORAGE_PATH, TRAINING_PATH, MODEL_PATH, OUTPUT_PATH, HF_API_TOKEN,
-    MODEL_TYPES, TRAINING_TYPES,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
@@ -50,6 +50,7 @@ from vms.utils import (
 )
 logger = logging.getLogger(__name__)
 class TrainingService:
     def __init__(self, app=None):
@@ -134,6 +135,7 @@ class TrainingService:
         validated_values = {}
         default_state = {
             "model_type": list(MODEL_TYPES.keys())[0],
             "training_type": list(TRAINING_TYPES.keys())[0],
             "lora_rank": DEFAULT_LORA_RANK_STR,
             "lora_alpha": DEFAULT_LORA_ALPHA_STR,
@@ -213,6 +215,7 @@ class TrainingService:
         ui_state_file = OUTPUT_PATH / "ui_state.json"
         default_state = {
             "model_type": list(MODEL_TYPES.keys())[0],
             "training_type": list(TRAINING_TYPES.keys())[0],
             "lora_rank": DEFAULT_LORA_RANK_STR,
             "lora_alpha": DEFAULT_LORA_ALPHA_STR,
@@ -255,7 +258,7 @@ class TrainingService:
                 if "model_type" in saved_state and " (LoRA)" in saved_state["model_type"]:
                     saved_state["model_type"] = saved_state["model_type"].replace(" (LoRA)", "")
                     logger.info(f"Removed (LoRA) suffix from saved model type: {saved_state['model_type']}")
                 # Convert numeric values to appropriate types
                 if "train_steps" in saved_state:
                     try:
@@ -302,6 +305,18 @@ class TrainingService:
                     if not model_found:
                         merged_state["model_type"] = default_state["model_type"]
                         logger.warning(f"Invalid model type in saved state, using default")
                 # Validate training_type is in available choices
                 if merged_state["training_type"] not in TRAINING_TYPES:
@@ -545,6 +560,7 @@ class TrainingService:
         repo_id: str,
         preset_name: str,
         training_type: str = DEFAULT_TRAINING_TYPE,
         resume_from_checkpoint: Optional[str] = None,
         num_gpus: int = DEFAULT_NUM_GPUS,
         precomputation_items: int = DEFAULT_PRECOMPUTATION_ITEMS,
@@ -869,6 +885,7 @@ class TrainingService:
             # Save session info including repo_id for later hub upload
             self.save_session({
                 "model_type": model_type,
                 "training_type": training_type,
                 "lora_rank": lora_rank,
                 "lora_alpha": lora_alpha,
@@ -1039,6 +1056,7 @@ class TrainingService:
                     last_session = {
                         "params": {
                             "model_type": MODEL_TYPES.get(ui_state.get("model_type", list(MODEL_TYPES.keys())[0])),
                             "training_type": TRAINING_TYPES.get(ui_state.get("training_type", list(TRAINING_TYPES.keys())[0])),
                             "lora_rank": ui_state.get("lora_rank", DEFAULT_LORA_RANK_STR),
                             "lora_alpha": ui_state.get("lora_alpha", DEFAULT_LORA_ALPHA_STR),
@@ -1102,8 +1120,9 @@ class TrainingService:
             # Add UI updates to restore the training parameters in the UI
             # This shows the user what values are being used for the resumed training
             ui_updates.update({
-                "model_type": model_type_display,  # Use the display name for the UI dropdown
-                "training_type": training_type_display,  # Use the display name for training type
                 "lora_rank": params.get('lora_rank', DEFAULT_LORA_RANK_STR),
                 "lora_alpha": params.get('lora_alpha', DEFAULT_LORA_ALPHA_STR),
                 "train_steps": params.get('train_steps', DEFAULT_NB_TRAINING_STEPS),
@@ -1122,19 +1141,19 @@ class TrainingService:
                     # Use the internal model_type for the actual training
                     # But keep model_type_display for the UI
                     result = self.start_training(
-                        model_type=model_type_internal,
                         lora_rank=params.get('lora_rank', DEFAULT_LORA_RANK_STR),
                         lora_alpha=params.get('lora_alpha', DEFAULT_LORA_ALPHA_STR),
                         train_size=params.get('train_steps', DEFAULT_NB_TRAINING_STEPS),
                         batch_size=params.get('batch_size', DEFAULT_BATCH_SIZE),
                         learning_rate=params.get('learning_rate', DEFAULT_LEARNING_RATE),
                         save_iterations=params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
                         repo_id=params.get('repo_id', ''),
                         preset_name=params.get('preset_name', list(TRAINING_PRESETS.keys())[0]),
                         training_type=training_type_internal,
                         resume_from_checkpoint=str(latest_checkpoint)
                     )
                     # Set buttons for active training
                     ui_updates.update({
                         "start_btn": {"interactive": False, "variant": "secondary", "value": "Continue Training"},
@@ -1142,7 +1161,7 @@ class TrainingService:
                         "delete_checkpoints_btn": {"interactive": False, "variant": "stop", "value": "Delete All Checkpoints"},
                         "pause_resume_btn": {"interactive": False, "variant": "secondary", "visible": False}
                     })
                     return {
                         "status": "recovered",
                         "message": f"Training resumed from checkpoint {checkpoint_step}",

 from vms.config import (
     TrainingConfig, TRAINING_PRESETS, LOG_FILE_PATH, TRAINING_VIDEOS_PATH,
     STORAGE_PATH, TRAINING_PATH, MODEL_PATH, OUTPUT_PATH, HF_API_TOKEN,
+    MODEL_TYPES, TRAINING_TYPES, MODEL_VERSIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
 )
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 class TrainingService:
     def __init__(self, app=None):
         validated_values = {}
         default_state = {
             "model_type": list(MODEL_TYPES.keys())[0],
+            "model_version": "",
             "training_type": list(TRAINING_TYPES.keys())[0],
             "lora_rank": DEFAULT_LORA_RANK_STR,
             "lora_alpha": DEFAULT_LORA_ALPHA_STR,
         ui_state_file = OUTPUT_PATH / "ui_state.json"
         default_state = {
             "model_type": list(MODEL_TYPES.keys())[0],
+            "model_version": "",
             "training_type": list(TRAINING_TYPES.keys())[0],
             "lora_rank": DEFAULT_LORA_RANK_STR,
             "lora_alpha": DEFAULT_LORA_ALPHA_STR,
                 if "model_type" in saved_state and " (LoRA)" in saved_state["model_type"]:
                     saved_state["model_type"] = saved_state["model_type"].replace(" (LoRA)", "")
                     logger.info(f"Removed (LoRA) suffix from saved model type: {saved_state['model_type']}")
                 # Convert numeric values to appropriate types
                 if "train_steps" in saved_state:
                     try:
                     if not model_found:
                         merged_state["model_type"] = default_state["model_type"]
                         logger.warning(f"Invalid model type in saved state, using default")
+                # Validate model_version is appropriate for model_type
+                if "model_type" in merged_state and "model_version" in merged_state:
+                    model_internal_type = MODEL_TYPES.get(merged_state["model_type"])
+                    if model_internal_type:
+                        valid_versions = MODEL_VERSIONS.get(model_internal_type, {}).keys()
+                        if merged_state["model_version"] not in valid_versions:
+                            # Set to default for this model type
+                            from vms.ui.project.tabs.train_tab import TrainTab
+                            train_tab = TrainTab(None)  # Temporary instance just for the helper method
+                            merged_state["model_version"] = train_tab.get_default_model_version(saved_state["model_type"])
+                            logger.warning(f"Invalid model version for {merged_state['model_type']}, using default")
                 # Validate training_type is in available choices
                 if merged_state["training_type"] not in TRAINING_TYPES:
         repo_id: str,
         preset_name: str,
         training_type: str = DEFAULT_TRAINING_TYPE,
+        model_version: str = "",
         resume_from_checkpoint: Optional[str] = None,
         num_gpus: int = DEFAULT_NUM_GPUS,
         precomputation_items: int = DEFAULT_PRECOMPUTATION_ITEMS,
             # Save session info including repo_id for later hub upload
             self.save_session({
                 "model_type": model_type,
+                "model_version": model_version,
                 "training_type": training_type,
                 "lora_rank": lora_rank,
                 "lora_alpha": lora_alpha,
                     last_session = {
                         "params": {
                             "model_type": MODEL_TYPES.get(ui_state.get("model_type", list(MODEL_TYPES.keys())[0])),
+                            "model_version":  ui_state.get("model_version", ""),
                             "training_type": TRAINING_TYPES.get(ui_state.get("training_type", list(TRAINING_TYPES.keys())[0])),
                             "lora_rank": ui_state.get("lora_rank", DEFAULT_LORA_RANK_STR),
                             "lora_alpha": ui_state.get("lora_alpha", DEFAULT_LORA_ALPHA_STR),
             # Add UI updates to restore the training parameters in the UI
             # This shows the user what values are being used for the resumed training
             ui_updates.update({
+                "model_type": model_type_display,
+                "model_version": params.get('model_version', ''),
+                "training_type": training_type_display,
                 "lora_rank": params.get('lora_rank', DEFAULT_LORA_RANK_STR),
                 "lora_alpha": params.get('lora_alpha', DEFAULT_LORA_ALPHA_STR),
                 "train_steps": params.get('train_steps', DEFAULT_NB_TRAINING_STEPS),
                     # Use the internal model_type for the actual training
                     # But keep model_type_display for the UI
                     result = self.start_training(
+                        model_type=model_internal_type,
                         lora_rank=params.get('lora_rank', DEFAULT_LORA_RANK_STR),
                         lora_alpha=params.get('lora_alpha', DEFAULT_LORA_ALPHA_STR),
                         train_size=params.get('train_steps', DEFAULT_NB_TRAINING_STEPS),
                         batch_size=params.get('batch_size', DEFAULT_BATCH_SIZE),
                         learning_rate=params.get('learning_rate', DEFAULT_LEARNING_RATE),
                         save_iterations=params.get('save_iterations', DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS),
+                        model_version=params.get('model_version', ''),
                         repo_id=params.get('repo_id', ''),
                         preset_name=params.get('preset_name', list(TRAINING_PRESETS.keys())[0]),
                         training_type=training_type_internal,
                         resume_from_checkpoint=str(latest_checkpoint)
                     )
                     # Set buttons for active training
                     ui_updates.update({
                         "start_btn": {"interactive": False, "variant": "secondary", "value": "Continue Training"},
                         "delete_checkpoints_btn": {"interactive": False, "variant": "stop", "value": "Delete All Checkpoints"},
                         "pause_resume_btn": {"interactive": False, "variant": "secondary", "visible": False}
                     })
                     return {
                         "status": "recovered",
                         "message": f"Training resumed from checkpoint {checkpoint_step}",

vms/ui/project/tabs/preview_tab.py CHANGED Viewed

@@ -4,16 +4,18 @@ Preview tab for Video Model Studio UI
 import gradio as gr
 import logging
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Tuple
 import time
 from vms.utils import BaseTab
 from vms.config import (
-    MODEL_TYPES, DEFAULT_PROMPT_PREFIX
 )
 logger = logging.getLogger(__name__)
 class PreviewTab(BaseTab):
     """Preview tab for testing trained models"""
@@ -49,25 +51,35 @@ class PreviewTab(BaseTab):
                         placeholder="Prefix to add to all prompts",
                         value=DEFAULT_PROMPT_PREFIX
                     )
                     with gr.Row():
                         # Get the currently selected model type from training tab if possible
                         default_model = self.get_default_model_type()
-                        # Make model_type read-only (disabled), as it must match what was trained
-                        self.components["model_type"] = gr.Dropdown(
-                            choices=list(MODEL_TYPES.keys()),
-                            label="Model Type (from training)",
-                            value=default_model,
-                            interactive=False
-                        )
-                        # Add model variant selection based on model type
-                        self.components["model_variant"] = gr.Dropdown(
-                            label="Model Variant",
-                            choices=self.get_variant_choices(default_model),
-                            value=self.get_default_variant(default_model)
-                        )
                     # Add image input for image-to-video models
                     self.components["conditioning_image"] = gr.Image(
@@ -177,36 +189,55 @@ class PreviewTab(BaseTab):
         return tab
-    def get_variant_choices(self, model_type: str) -> List[str]:
-        """Get model variant choices based on model type"""
         # Convert UI display name to internal name
         internal_type = MODEL_TYPES.get(model_type)
         if not internal_type:
             return []
-        # Get variants from preview service
-        variants = self.app.previewing.get_model_variants(internal_type)
-        if not variants:
             return []
         # Format choices with display name and description
         choices = []
-        for model_id, info in variants.items():
             choices.append(f"{model_id} - {info.get('name', '')}")
         return choices
-    def get_default_variant(self, model_type: str) -> str:
-        """Get default model variant for the model type"""
-        choices = self.get_variant_choices(model_type)
         if choices:
             return choices[0]
         return ""
     def get_default_model_type(self) -> str:
-        """Get the currently selected model type from training tab"""
         try:
-            # Try to get the model type from UI state
             ui_state = self.app.training.load_ui_state()
             model_type = ui_state.get("model_type")
@@ -214,7 +245,7 @@ class PreviewTab(BaseTab):
             if model_type in MODEL_TYPES:
                 return model_type
-            # If we couldn't get a valid model type, try to get it from the training tab directly
             if hasattr(self.app, 'tabs') and 'train_tab' in self.app.tabs:
                 train_tab = self.app.tabs['train_tab']
                 if hasattr(train_tab, 'components') and 'model_type' in train_tab.components:
@@ -225,31 +256,31 @@ class PreviewTab(BaseTab):
             # Fallback to first model type
             return list(MODEL_TYPES.keys())[0]
         except Exception as e:
-            logger.warning(f"Failed to get default model type: {e}")
             return list(MODEL_TYPES.keys())[0]
-    def extract_model_id(self, variant_choice: str) -> str:
-        """Extract model ID from variant choice string"""
-        if " - " in variant_choice:
-            return variant_choice.split(" - ")[0].strip()
-        return variant_choice
-    def get_variant_type(self, model_type: str, model_variant: str) -> str:
-        """Get the variant type (text-to-video or image-to-video)"""
         # Convert UI display name to internal name
         internal_type = MODEL_TYPES.get(model_type)
         if not internal_type:
             return "text-to-video"
-        # Extract model_id from variant choice
-        model_id = self.extract_model_id(model_variant)
-        # Get variants from preview service
-        variants = self.app.previewing.get_model_variants(internal_type)
-        variant_info = variants.get(model_id, {})
-        # Return the variant type or default to text-to-video
-        return variant_info.get("type", "text-to-video")
     def connect_events(self) -> None:
         """Connect event handlers to UI components"""
@@ -264,23 +295,23 @@ class PreviewTab(BaseTab):
             ]
         )
-        # Update model_variant choices when model_type changes or tab is selected
         if hasattr(self.app, 'tabs_component') and self.app.tabs_component is not None:
             self.app.tabs_component.select(
-                fn=self.sync_model_type_and_variants,
                 inputs=[],
                 outputs=[
                     self.components["model_type"],
-                    self.components["model_variant"]
                 ]
             )
-        # Update variant-specific UI elements when variant changes
-        self.components["model_variant"].change(
-            fn=self.update_variant_ui,
             inputs=[
                 self.components["model_type"],
-                self.components["model_variant"]
             ],
             outputs=[
                 self.components["conditioning_image"]
@@ -305,13 +336,13 @@ class PreviewTab(BaseTab):
                     self.components["lora_weight"],
                     self.components["inference_steps"],
                     self.components["enable_cpu_offload"],
-                    self.components["model_variant"]
                 ]
             )
         # Save preview UI state when values change
         for component_name in [
-            "prompt", "negative_prompt", "prompt_prefix", "model_variant", "resolution_preset",
             "width", "height", "num_frames", "fps", "guidance_scale", "flow_shift",
             "lora_weight", "inference_steps", "enable_cpu_offload"
         ]:
@@ -327,7 +358,7 @@ class PreviewTab(BaseTab):
             fn=self.generate_video,
             inputs=[
                 self.components["model_type"],
-                self.components["model_variant"],
                 self.components["prompt"],
                 self.components["negative_prompt"],
                 self.components["prompt_prefix"],
@@ -349,22 +380,41 @@ class PreviewTab(BaseTab):
             ]
         )
-    def update_variant_ui(self, model_type: str, model_variant: str) -> Dict[str, Any]:
-        """Update UI based on the selected model variant"""
-        variant_type = self.get_variant_type(model_type, model_variant)
         # Show conditioning image input only for image-to-video models
-        show_conditioning_image = variant_type == "image-to-video"
         return {
             self.components["conditioning_image"]: gr.Image(visible=show_conditioning_image)
         }
-    def sync_model_type_and_variants(self) -> Tuple[str, str]:
-        """Sync model type with training tab when preview tab is selected and update variant choices"""
         model_type = self.get_default_model_type()
-        model_variant = self.get_default_variant(model_type)
-        return model_type, model_variant
     def update_resolution(self, preset: str) -> Tuple[int, int, float]:
         """Update resolution and flow shift based on preset"""
@@ -385,11 +435,11 @@ class PreviewTab(BaseTab):
             # Get model type (can't be changed in UI)
             model_type = self.get_default_model_type()
-            # If model_variant not in choices for current model_type, use default
-            model_variant = preview_state.get("model_variant", "")
-            variant_choices = self.get_variant_choices(model_type)
-            if model_variant not in variant_choices and variant_choices:
-                model_variant = variant_choices[0]
             return (
                 preview_state.get("prompt", ""),
@@ -404,7 +454,7 @@ class PreviewTab(BaseTab):
                 preview_state.get("lora_weight", 0.7),
                 preview_state.get("inference_steps", 30),
                 preview_state.get("enable_cpu_offload", True),
-                model_variant
             )
         except Exception as e:
             logger.error(f"Error loading preview state: {e}")
@@ -414,7 +464,7 @@ class PreviewTab(BaseTab):
                 "worst quality, low quality, blurry, jittery, distorted, ugly, deformed, disfigured, messy background",
                 DEFAULT_PROMPT_PREFIX,
                 832, 480, 49, 16, 5.0, 3.0, 0.7, 30, True,
-                self.get_default_variant(self.get_default_model_type())
             )
     def save_preview_state_value(self, value: Any) -> None:
@@ -456,7 +506,7 @@ class PreviewTab(BaseTab):
     def generate_video(
         self,
         model_type: str,
-        model_variant: str,
         prompt: str,
         negative_prompt: str,
         prompt_prefix: str,
@@ -473,13 +523,14 @@ class PreviewTab(BaseTab):
     ) -> Tuple[Optional[str], str, str]:
         """Handler for generate button click, delegates to preview service"""
         # Save all the parameters to preview state before generating
         try:
             state = self.app.training.load_ui_state()
             if "preview" not in state:
                 state["preview"] = {}
-            # Extract model ID from variant choice
-            model_variant_id = self.extract_model_id(model_variant)
             # Update all values
             preview_state = {
@@ -487,7 +538,7 @@ class PreviewTab(BaseTab):
                 "negative_prompt": negative_prompt,
                 "prompt_prefix": prompt_prefix,
                 "model_type": model_type,
-                "model_variant": model_variant,
                 "width": width,
                 "height": height,
                 "num_frames": num_frames,
@@ -504,40 +555,30 @@ class PreviewTab(BaseTab):
         except Exception as e:
             logger.error(f"Error saving preview state before generation: {e}")
-        # Clear the log display at the start to make room for new logs
-        # Yield and sleep briefly to allow UI update
-        yield None, "Starting generation...", ""
-        time.sleep(0.1)
-        # Extract model ID from variant choice string
-        model_variant_id = self.extract_model_id(model_variant)
-        # Use streaming updates to provide real-time feedback during generation
-        def generate_with_updates():
-            # Initial UI update
-            yield None, "Initializing generation...", "Starting video generation process..."
-            # Start actual generation
-            result = self.app.previewing.generate_video(
-                model_type=model_type,
-                model_variant=model_variant_id,
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                prompt_prefix=prompt_prefix,
-                width=width,
-                height=height,
-                num_frames=num_frames,
-                guidance_scale=guidance_scale,
-                flow_shift=flow_shift,
-                lora_weight=lora_weight,
-                inference_steps=inference_steps,
-                enable_cpu_offload=enable_cpu_offload,
-                fps=fps,
-                conditioning_image=conditioning_image
-            )
-            # Return final result
-            return result
-        # Return the generator for streaming updates
-        return generate_with_updates()

 import gradio as gr
 import logging
+import json
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Tuple
 import time
 from vms.utils import BaseTab
 from vms.config import (
+    OUTPUT_PATH, MODEL_TYPES, DEFAULT_PROMPT_PREFIX, MODEL_VERSIONS
 )
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
 class PreviewTab(BaseTab):
     """Preview tab for testing trained models"""
                         placeholder="Prefix to add to all prompts",
                         value=DEFAULT_PROMPT_PREFIX
                     )
+                    self.components["seed"] = gr.Slider(
+                        label="Generation Seed (-1 for random)",
+                        minimum=-1,
+                        maximum=2147483647,  # 2^31 - 1
+                        step=1,
+                        value=-1,
+                        info="Set to -1 for random seed or specific value for reproducible results"
+                    )
                     with gr.Row():
                         # Get the currently selected model type from training tab if possible
                         default_model = self.get_default_model_type()
+                        with gr.Column():
+                            # Make model_type read-only (disabled), as it must match what was trained
+                            self.components["model_type"] = gr.Dropdown(
+                                choices=list(MODEL_TYPES.keys()),
+                                label="Model Type (from training)",
+                                value=default_model,
+                                interactive=False
+                            )
+                            # Add model version selection based on model type
+                            self.components["model_version"] = gr.Dropdown(
+                                label="Model Version",
+                                choices=self.get_model_version_choices(default_model),
+                                value=self.get_default_model_version(default_model)
+                            )
                     # Add image input for image-to-video models
                     self.components["conditioning_image"] = gr.Image(
         return tab
+    def get_model_version_choices(self, model_type: str) -> List[str]:
+        """Get model version choices based on model type"""
         # Convert UI display name to internal name
         internal_type = MODEL_TYPES.get(model_type)
         if not internal_type:
             return []
+        # Get versions from preview service
+        versions = self.app.previewing.get_model_versions(internal_type)
+        if not versions:
             return []
         # Format choices with display name and description
         choices = []
+        for model_id, info in versions.items():
             choices.append(f"{model_id} - {info.get('name', '')}")
         return choices
+    def get_default_model_version(self, model_type: str) -> str:
+        """Get default model version for the model type"""
+        choices = self.get_model_version_choices(model_type)
         if choices:
             return choices[0]
         return ""
     def get_default_model_type(self) -> str:
+        """Get the model type from the latest training session"""
         try:
+            # First check the session.json which contains the actual training data
+            session_file = OUTPUT_PATH / "session.json"
+            if session_file.exists():
+                with open(session_file, 'r') as f:
+                    session_data = json.load(f)
+                # Get the internal model type from the session parameters
+                if "params" in session_data and "model_type" in session_data["params"]:
+                    internal_model_type = session_data["params"]["model_type"]
+                    # Convert internal model type to display name
+                    for display_name, internal_name in MODEL_TYPES.items():
+                        if internal_name == internal_model_type:
+                            logger.info(f"Using model type '{display_name}' from session file")
+                            return display_name
+                    # If we couldn't map it, log a warning
+                    logger.warning(f"Could not map internal model type '{internal_model_type}' to a display name")
+            # If we couldn't get it from session.json, try to get it from UI state
             ui_state = self.app.training.load_ui_state()
             model_type = ui_state.get("model_type")
             if model_type in MODEL_TYPES:
                 return model_type
+            # If we still couldn't get a valid model type, try to get it from the training tab
             if hasattr(self.app, 'tabs') and 'train_tab' in self.app.tabs:
                 train_tab = self.app.tabs['train_tab']
                 if hasattr(train_tab, 'components') and 'model_type' in train_tab.components:
             # Fallback to first model type
             return list(MODEL_TYPES.keys())[0]
         except Exception as e:
+            logger.warning(f"Failed to get default model type from session: {e}")
             return list(MODEL_TYPES.keys())[0]
+    def extract_model_id(self, model_version_choice: str) -> str:
+        """Extract model ID from model version choice string"""
+        if " - " in model_version_choice:
+            return model_version_choice.split(" - ")[0].strip()
+        return model_version_choice
+    def get_model_version_type(self, model_type: str, model_version: str) -> str:
+        """Get the model version type (text-to-video or image-to-video)"""
         # Convert UI display name to internal name
         internal_type = MODEL_TYPES.get(model_type)
         if not internal_type:
             return "text-to-video"
+        # Extract model_id from model version choice
+        model_id = self.extract_model_id(model_version)
+        # Get versions from preview service
+        versions = self.app.previewing.get_model_versions(internal_type)
+        model_version_info = versions.get(model_id, {})
+        # Return the model version type or default to text-to-video
+        return model_version_info.get("type", "text-to-video")
     def connect_events(self) -> None:
         """Connect event handlers to UI components"""
             ]
         )
+        # Update model_version choices when model_type changes or tab is selected
         if hasattr(self.app, 'tabs_component') and self.app.tabs_component is not None:
             self.app.tabs_component.select(
+                fn=self.sync_model_type_and_verions,
                 inputs=[],
                 outputs=[
                     self.components["model_type"],
+                    self.components["model_version"]
                 ]
             )
+        # Update model version-specific UI elements when version changes
+        self.components["model_version"].change(
+            fn=self.update_model_version_ui,
             inputs=[
                 self.components["model_type"],
+                self.components["model_version"]
             ],
             outputs=[
                 self.components["conditioning_image"]
                     self.components["lora_weight"],
                     self.components["inference_steps"],
                     self.components["enable_cpu_offload"],
+                    self.components["model_version"]
                 ]
             )
         # Save preview UI state when values change
         for component_name in [
+            "prompt", "negative_prompt", "prompt_prefix", "model_version", "resolution_preset",
             "width", "height", "num_frames", "fps", "guidance_scale", "flow_shift",
             "lora_weight", "inference_steps", "enable_cpu_offload"
         ]:
             fn=self.generate_video,
             inputs=[
                 self.components["model_type"],
+                self.components["model_version"],
                 self.components["prompt"],
                 self.components["negative_prompt"],
                 self.components["prompt_prefix"],
             ]
         )
+    def update_model_version_ui(self, model_type: str, model_version: str) -> Dict[str, Any]:
+        """Update UI based on the selected model version"""
+        model_version_type = self.get_model_version_type(model_type, model_version)
         # Show conditioning image input only for image-to-video models
+        show_conditioning_image = model_version_type == "image-to-video"
         return {
             self.components["conditioning_image"]: gr.Image(visible=show_conditioning_image)
         }
+    def sync_model_type_and_verions(self) -> Tuple[str, str]:
+        """Sync model type with training tab when preview tab is selected and update model version choices"""
         model_type = self.get_default_model_type()
+        model_version = ""
+        # Try to get model_version from session or UI state
+        ui_state = self.app.training.load_ui_state()
+        preview_state = ui_state.get("preview", {})
+        model_version = preview_state.get("model_version", "")
+        if not model_version:
+            # Format it as a display choice
+            internal_type = MODEL_TYPES.get(model_type)
+            if internal_type and internal_type in MODEL_VERSIONS:
+                first_version = next(iter(MODEL_VERSIONS[internal_type].keys()), "")
+                if first_version:
+                    model_version_info = MODEL_VERSIONS[internal_type][first_version]
+                    model_version = f"{first_version} - {model_version_info.get('name', '')}"
+        # If we couldn't get it, use default
+        if not model_version:
+            model_version = self.get_default_model_version(model_type)
+        return model_type, model_version
     def update_resolution(self, preset: str) -> Tuple[int, int, float]:
         """Update resolution and flow shift based on preset"""
             # Get model type (can't be changed in UI)
             model_type = self.get_default_model_type()
+            # If model_version not in choices for current model_type, use default
+            model_version = preview_state.get("model_version", "")
+            model_version_choices = self.get_model_version_choices(model_type)
+            if model_version not in model_version_choices and model_version_choices:
+                model_version = model_version_choices[0]
             return (
                 preview_state.get("prompt", ""),
                 preview_state.get("lora_weight", 0.7),
                 preview_state.get("inference_steps", 30),
                 preview_state.get("enable_cpu_offload", True),
+                model_version
             )
         except Exception as e:
             logger.error(f"Error loading preview state: {e}")
                 "worst quality, low quality, blurry, jittery, distorted, ugly, deformed, disfigured, messy background",
                 DEFAULT_PROMPT_PREFIX,
                 832, 480, 49, 16, 5.0, 3.0, 0.7, 30, True,
+                self.get_default_model_version(self.get_default_model_type())
             )
     def save_preview_state_value(self, value: Any) -> None:
     def generate_video(
         self,
         model_type: str,
+        model_version: str,
         prompt: str,
         negative_prompt: str,
         prompt_prefix: str,
     ) -> Tuple[Optional[str], str, str]:
         """Handler for generate button click, delegates to preview service"""
         # Save all the parameters to preview state before generating
+        print("preview_tab: generate_video() has been called")
         try:
             state = self.app.training.load_ui_state()
             if "preview" not in state:
                 state["preview"] = {}
+            # Extract model ID from model version choice
+            model_version_id = self.extract_model_id(model_version)
             # Update all values
             preview_state = {
                 "negative_prompt": negative_prompt,
                 "prompt_prefix": prompt_prefix,
                 "model_type": model_type,
+                "model_version": model_version,
                 "width": width,
                 "height": height,
                 "num_frames": num_frames,
         except Exception as e:
             logger.error(f"Error saving preview state before generation: {e}")
+        # Extract model ID from model version choice string
+        model_version_id = self.extract_model_id(model_version)
+        # Initial UI update
+        video_path, status, log = None, "Initializing generation...", "Starting video generation process..."
+        # Start actual generation
+        result = self.app.previewing.generate_video(
+            model_type=model_type,
+            model_version=model_version_id,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            prompt_prefix=prompt_prefix,
+            width=width,
+            height=height,
+            num_frames=num_frames,
+            guidance_scale=guidance_scale,
+            flow_shift=flow_shift,
+            lora_weight=lora_weight,
+            inference_steps=inference_steps,
+            enable_cpu_offload=enable_cpu_offload,
+            fps=fps,
+            conditioning_image=conditioning_image
+        )
+        # Return final result
+        return result

vms/ui/project/tabs/train_tab.py CHANGED Viewed

@@ -5,12 +5,15 @@ Train tab for Video Model Studio UI with improved task progress display
 import gradio as gr
 import logging
 import os
 from typing import Dict, Any, List, Optional, Tuple
 from pathlib import Path
 from vms.utils import BaseTab
 from vms.config import (
-    TRAINING_PRESETS, OUTPUT_PATH, MODEL_TYPES, ASK_USER_TO_DUPLICATE_SPACE, SMALL_TRAINING_BUCKETS, TRAINING_TYPES,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
@@ -53,12 +56,27 @@ class TrainTab(BaseTab):
                     with gr.Row():
                         with gr.Column():
                             self.components["model_type"] = gr.Dropdown(
                                 choices=list(MODEL_TYPES.keys()),
                                 label="Model Type",
-                                value=list(MODEL_TYPES.keys())[0]
                             )
-                        with gr.Column():
                             self.components["training_type"] = gr.Dropdown(
                                 choices=list(TRAINING_TYPES.keys()),
                                 label="Training Type",
@@ -198,45 +216,36 @@ class TrainTab(BaseTab):
     def connect_events(self) -> None:
         """Connect event handlers to UI components"""
-        # Model type change event
-        def update_model_info(model, training_type):
-            params = self.get_default_params(MODEL_TYPES[model], TRAINING_TYPES[training_type])
-            info = self.get_model_info(model, training_type)
-            show_lora_params = training_type == list(TRAINING_TYPES.keys())[0]  # Show if LoRA Finetune
-            return {
-                self.components["model_info"]: info,
-                self.components["train_steps"]: params["train_steps"],
-                self.components["batch_size"]: params["batch_size"],
-                self.components["learning_rate"]: params["learning_rate"],
-                self.components["save_iterations"]: params["save_iterations"],
-                self.components["lora_params_row"]: gr.Row(visible=show_lora_params)
-            }
         self.components["model_type"].change(
             fn=lambda v: self.app.update_ui_state(model_type=v),
             inputs=[self.components["model_type"]],
             outputs=[]
         ).then(
-            fn=update_model_info,
             inputs=[self.components["model_type"], self.components["training_type"]],
-            outputs=[
-                self.components["model_info"],
-                self.components["train_steps"],
-                self.components["batch_size"],
-                self.components["learning_rate"],
-                self.components["save_iterations"],
-                self.components["lora_params_row"]
-            ]
         )
         # Training type change event
         self.components["training_type"].change(
             fn=lambda v: self.app.update_ui_state(training_type=v),
             inputs=[self.components["training_type"]],
             outputs=[]
         ).then(
-            fn=update_model_info,
             inputs=[self.components["model_type"], self.components["training_type"]],
             outputs=[
                 self.components["model_info"],
@@ -248,7 +257,6 @@ class TrainTab(BaseTab):
             ]
         )
         # Add in the connect_events() method:
         self.components["num_gpus"].change(
             fn=lambda v: self.app.update_ui_state(num_gpus=v),
@@ -326,7 +334,9 @@ class TrainTab(BaseTab):
                 self.components["lora_params_row"],
                 self.components["num_gpus"],
                 self.components["precomputation_items"],
-                self.components["lr_warmup_steps"]
             ]
         )
@@ -336,6 +346,7 @@ class TrainTab(BaseTab):
             inputs=[
                 self.components["training_preset"],
                 self.components["model_type"],
                 self.components["training_type"],
                 self.components["lora_rank"],
                 self.components["lora_alpha"],
@@ -383,9 +394,19 @@ class TrainTab(BaseTab):
             fn=lambda: self.app.training.delete_all_checkpoints(),
             outputs=[self.components["status_box"]]
         )
     def handle_training_start(
-        self, preset, model_type, training_type, lora_rank, lora_alpha, train_steps, batch_size, learning_rate, save_iterations, repo_id, progress=gr.Progress()
     ):
         """Handle training start with proper log parser reset and checkpoint detection"""
         # Safely reset log parser if it exists
@@ -396,9 +417,6 @@ class TrainTab(BaseTab):
             from ..utils import TrainingLogParser
             self.app.log_parser = TrainingLogParser()
-        # Initialize progress
-        #progress(0, desc="Initializing training")
         # Check for latest checkpoint
         checkpoints = list(OUTPUT_PATH.glob("checkpoint-*"))
         resume_from = None
@@ -408,10 +426,6 @@ class TrainTab(BaseTab):
             latest_checkpoint = max(checkpoints, key=os.path.getmtime)
             resume_from = str(latest_checkpoint)
             logger.info(f"Found checkpoint at {resume_from}, will resume training")
-            #progress(0.05, desc=f"Resuming from checkpoint {Path(resume_from).name}")
-        else:
-            #progress(0.05, desc="Starting new training run")
-            pass
         # Convert model_type display name to internal name
         model_internal_type = MODEL_TYPES.get(model_type)
@@ -432,9 +446,6 @@ class TrainTab(BaseTab):
         precomputation_items = int(self.components["precomputation_items"].value)
         lr_warmup_steps = int(self.components["lr_warmup_steps"].value)
-        # Progress update
-        #progress(0.1, desc="Preparing dataset")
         # Start training (it will automatically use the checkpoint if provided)
         try:
             return self.app.training.start_training(
@@ -448,6 +459,7 @@ class TrainTab(BaseTab):
                 repo_id,
                 preset_name=preset,
                 training_type=training_internal_type,
                 resume_from_checkpoint=resume_from,
                 num_gpus=num_gpus,
                 precomputation_items=precomputation_items,
@@ -458,6 +470,52 @@ class TrainTab(BaseTab):
             logger.exception("Error starting training")
             return f"Error starting training: {str(e)}", f"Exception: {str(e)}\n\nCheck the logs for more details."
     def get_model_info(self, model_type: str, training_type: str) -> str:
         """Get information about the selected model type and training method"""
         if model_type == "HunyuanVideo":
@@ -483,14 +541,14 @@ class TrainTab(BaseTab):
             else:
                 return base_info + "\n- Required VRAM: ~21GB minimum\n- Full model size: ~8GB"
-        elif model_type == "Wan-2.1-T2V":
-            base_info = """### Wan-2.1-T2V
-    - Recommended batch size: ?
-    - Typical training time: ? hours
     - Default resolution: 49x512x768"""
             if training_type == "LoRA Finetune":
-                return base_info + "\n- Required VRAM: ?GB minimum\n- Default LoRA rank: 32 (~120 MB)"
             else:
                 return base_info + "\n- **Full finetune not recommended due to VRAM requirements**"
@@ -601,6 +659,10 @@ class TrainTab(BaseTab):
         precomputation_items_val = current_state.get("precomputation_items") if current_state.get("precomputation_items") != preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS) else preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS)
         lr_warmup_steps_val = current_state.get("lr_warmup_steps") if current_state.get("lr_warmup_steps") != preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS) else preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS)
         # Return values in the same order as the output components
         return (
             model_display_name,
@@ -615,9 +677,11 @@ class TrainTab(BaseTab):
             gr.Row(visible=show_lora_params),
             num_gpus_val,
             precomputation_items_val,
-            lr_warmup_steps_val
         )
     def get_latest_status_message_and_logs(self) -> Tuple[str, str, str]:
         """Get latest status message, log content, and status code in a safer way"""
         state = self.app.training.get_status()

 import gradio as gr
 import logging
 import os
+import json
 from typing import Dict, Any, List, Optional, Tuple
 from pathlib import Path
 from vms.utils import BaseTab
 from vms.config import (
+    OUTPUT_PATH, ASK_USER_TO_DUPLICATE_SPACE,
+    SMALL_TRAINING_BUCKETS,
+    TRAINING_PRESETS, TRAINING_TYPES, MODEL_TYPES, MODEL_VERSIONS,
     DEFAULT_NB_TRAINING_STEPS, DEFAULT_SAVE_CHECKPOINT_EVERY_N_STEPS,
     DEFAULT_BATCH_SIZE, DEFAULT_CAPTION_DROPOUT_P,
     DEFAULT_LEARNING_RATE,
                     with gr.Row():
                         with gr.Column():
+                            # Get the default model type from the first preset
+                            default_model_type = list(MODEL_TYPES.keys())[0]
                             self.components["model_type"] = gr.Dropdown(
                                 choices=list(MODEL_TYPES.keys()),
                                 label="Model Type",
+                                value=default_model_type,
+                                interactive=True
                             )
+                            # Get model versions for the default model type
+                            default_model_versions = self.get_model_version_choices(default_model_type)
+                            default_model_version = self.get_default_model_version(default_model_type)
+                            self.components["model_version"] = gr.Dropdown(
+                                choices=default_model_versions,
+                                label="Model Version",
+                                value=default_model_version,
+                                interactive=True
+                            )
                             self.components["training_type"] = gr.Dropdown(
                                 choices=list(TRAINING_TYPES.keys()),
                                 label="Training Type",
     def connect_events(self) -> None:
         """Connect event handlers to UI components"""
+        # Model type change event - Update model version dropdown choices
         self.components["model_type"].change(
+            fn=self.update_model_versions,
+            inputs=[self.components["model_type"]],
+            outputs=[self.components["model_version"]]
+        ).then(
             fn=lambda v: self.app.update_ui_state(model_type=v),
             inputs=[self.components["model_type"]],
             outputs=[]
         ).then(
+            # Use get_model_info instead of update_model_info
+            fn=self.get_model_info,
             inputs=[self.components["model_type"], self.components["training_type"]],
+            outputs=[self.components["model_info"]]
         )
+        # Model version change event
+        self.components["model_version"].change(
+            fn=lambda v: self.app.update_ui_state(model_version=v),
+            inputs=[self.components["model_version"]],
+            outputs=[]
+        )
         # Training type change event
         self.components["training_type"].change(
             fn=lambda v: self.app.update_ui_state(training_type=v),
             inputs=[self.components["training_type"]],
             outputs=[]
         ).then(
+            fn=self.update_model_info,
             inputs=[self.components["model_type"], self.components["training_type"]],
             outputs=[
                 self.components["model_info"],
             ]
         )
         # Add in the connect_events() method:
         self.components["num_gpus"].change(
             fn=lambda v: self.app.update_ui_state(num_gpus=v),
                 self.components["lora_params_row"],
                 self.components["num_gpus"],
                 self.components["precomputation_items"],
+                self.components["lr_warmup_steps"],
+                # Add model_version to the outputs
+                self.components["model_version"]
             ]
         )
             inputs=[
                 self.components["training_preset"],
                 self.components["model_type"],
+                self.components["model_version"],  # Add model_version to the inputs
                 self.components["training_type"],
                 self.components["lora_rank"],
                 self.components["lora_alpha"],
             fn=lambda: self.app.training.delete_all_checkpoints(),
             outputs=[self.components["status_box"]]
         )
+    def update_model_versions(self, model_type: str) -> Dict:
+        """Update model version choices based on selected model type"""
+        model_versions = self.get_model_version_choices(model_type)
+        default_version = self.get_default_model_version(model_type)
+        # Update the model_version dropdown with new choices and default value
+        return gr.Dropdown(choices=model_versions, value=default_version)
     def handle_training_start(
+        self, preset, model_type, model_version, training_type,
+        lora_rank, lora_alpha, train_steps, batch_size, learning_rate,
+        save_iterations, repo_id, progress=gr.Progress()
     ):
         """Handle training start with proper log parser reset and checkpoint detection"""
         # Safely reset log parser if it exists
             from ..utils import TrainingLogParser
             self.app.log_parser = TrainingLogParser()
         # Check for latest checkpoint
         checkpoints = list(OUTPUT_PATH.glob("checkpoint-*"))
         resume_from = None
             latest_checkpoint = max(checkpoints, key=os.path.getmtime)
             resume_from = str(latest_checkpoint)
             logger.info(f"Found checkpoint at {resume_from}, will resume training")
         # Convert model_type display name to internal name
         model_internal_type = MODEL_TYPES.get(model_type)
         precomputation_items = int(self.components["precomputation_items"].value)
         lr_warmup_steps = int(self.components["lr_warmup_steps"].value)
         # Start training (it will automatically use the checkpoint if provided)
         try:
             return self.app.training.start_training(
                 repo_id,
                 preset_name=preset,
                 training_type=training_internal_type,
+                model_version=model_version,  # Pass the model version from dropdown
                 resume_from_checkpoint=resume_from,
                 num_gpus=num_gpus,
                 precomputation_items=precomputation_items,
             logger.exception("Error starting training")
             return f"Error starting training: {str(e)}", f"Exception: {str(e)}\n\nCheck the logs for more details."
+    def get_model_version_choices(self, model_type: str) -> List[str]:
+        """Get model version choices based on model type"""
+        # Convert UI display name to internal name
+        internal_type = MODEL_TYPES.get(model_type)
+        if not internal_type or internal_type not in MODEL_VERSIONS:
+            return []
+        # Get versions and return them as choices
+        versions = MODEL_VERSIONS.get(internal_type, {})
+        return list(versions.keys())
+    def get_default_model_version(self, model_type: str) -> str:
+        """Get default model version for the given model type"""
+        # Convert UI display name to internal name
+        internal_type = MODEL_TYPES.get(model_type)
+        if not internal_type or internal_type not in MODEL_VERSIONS:
+            return ""
+        # Get the first version available for this model type
+        versions = MODEL_VERSIONS.get(internal_type, {})
+        if versions:
+            return next(iter(versions.keys()))
+        return ""
+    def update_model_info(self, model_type: str, training_type: str) -> Dict:
+        """Update model info and related UI components based on model type and training type"""
+        # Get model info text
+        model_info = self.get_model_info(model_type, training_type)
+        # Get default parameters for this model type and training type
+        params = self.get_default_params(MODEL_TYPES.get(model_type), TRAINING_TYPES.get(training_type))
+        # Check if LoRA params should be visible
+        show_lora_params = training_type == "LoRA Finetune"
+        # Return updates for UI components
+        return {
+            self.components["model_info"]: model_info,
+            self.components["train_steps"]: params["train_steps"],
+            self.components["batch_size"]: params["batch_size"],
+            self.components["learning_rate"]: params["learning_rate"],
+            self.components["save_iterations"]: params["save_iterations"],
+            self.components["lora_params_row"]: gr.Row(visible=show_lora_params)
+        }
     def get_model_info(self, model_type: str, training_type: str) -> str:
         """Get information about the selected model type and training method"""
         if model_type == "HunyuanVideo":
             else:
                 return base_info + "\n- Required VRAM: ~21GB minimum\n- Full model size: ~8GB"
+        elif model_type == "Wan":
+            base_info = """### Wan
+    - Recommended batch size: 1-4
+    - Typical training time: 1-3 hours
     - Default resolution: 49x512x768"""
             if training_type == "LoRA Finetune":
+                return base_info + "\n- Required VRAM: ~16GB minimum\n- Default LoRA rank: 32 (~120 MB)"
             else:
                 return base_info + "\n- **Full finetune not recommended due to VRAM requirements**"
         precomputation_items_val = current_state.get("precomputation_items") if current_state.get("precomputation_items") != preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS) else preset.get("precomputation_items", DEFAULT_PRECOMPUTATION_ITEMS)
         lr_warmup_steps_val = current_state.get("lr_warmup_steps") if current_state.get("lr_warmup_steps") != preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS) else preset.get("lr_warmup_steps", DEFAULT_NB_LR_WARMUP_STEPS)
+        # Get the appropriate model version for the selected model type
+        model_versions = self.get_model_version_choices(model_display_name)
+        default_model_version = self.get_default_model_version(model_display_name)
         # Return values in the same order as the output components
         return (
             model_display_name,
             gr.Row(visible=show_lora_params),
             num_gpus_val,
             precomputation_items_val,
+            lr_warmup_steps_val,
+            gr.Dropdown(choices=model_versions, value=default_model_version)
         )
     def get_latest_status_message_and_logs(self) -> Tuple[str, str, str]:
         """Get latest status message, log content, and status code in a safer way"""
         state = self.app.training.get_status()