Spaces:

jbilcke-hf
/

VideoModelStudio

Paused

App Files Files Community

Julian Bilcke commited on Mar 1

Commit

0ad7e2a

1 Parent(s): 40f9c1e

refactoring to a better architecture

Browse files

Files changed (24) hide show

app.py +31 -1563
app_DEPRECATED.py +1603 -0
vms/config.py +10 -1
vms/services/__init__.py +12 -0
vms/{captioning_service.py → services/captioner.py} +2 -3
vms/{import_service.py → services/importer.py} +2 -3
vms/{splitting_service.py → services/splitter.py} +2 -5
vms/{training_service.py → services/trainer.py} +71 -4
vms/tabs/__init__.py +17 -0
vms/tabs/base_tab.py +44 -0
vms/tabs/caption_tab.py +176 -0
vms/tabs/import_tab.py +122 -0
vms/tabs/manage_tab.py +117 -0
vms/tabs/split_tab.py +56 -0
vms/tabs/train_tab.py +280 -0
vms/ui/__init__.py +5 -0
vms/ui/video_trainer_ui.py +1100 -0
vms/utils/__init__.py +33 -0
vms/{finetrainers_utils.py → utils/finetrainers_utils.py} +1 -1
vms/{image_preprocessing.py → utils/image_preprocessing.py} +1 -1
vms/utils/parse_bool_env.py +12 -0
vms/{training_log_parser.py → utils/training_log_parser.py} +0 -0
vms/{utils.py → utils/utils.py} +0 -0
vms/{video_preprocessing.py → utils/video_preprocessing.py} +0 -0

app.py CHANGED Viewed

@@ -1,1575 +1,28 @@
-import platform
-import subprocess
-#import sys
-#print("python = ", sys.version)
-# can be "Linux", "Darwin"
-if platform.system() == "Linux":
-    # for some reason it says "pip not found"
-    # and also "pip3 not found"
-    # subprocess.run(
-    #     "pip install flash-attn --no-build-isolation",
-    #
-    #     # hmm... this should be False, since we are in a CUDA environment, no?
-    #     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
-    #
-    #     shell=True,
-    # )
-    pass
 import gradio as gr
-from pathlib import Path
 import logging
-import mimetypes
-import shutil
-import os
-import traceback
-import asyncio
-import tempfile
-import zipfile
-from typing import Any, Optional, Dict, List, Union, Tuple
-from typing import AsyncGenerator
-from vms.training_service import TrainingService
-from vms.captioning_service import CaptioningService
-from vms.splitting_service import SplittingService
-from vms.import_service import ImportService
 from vms.config import (
-    STORAGE_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH,
-    TRAINING_PATH, LOG_FILE_PATH, TRAINING_PRESETS, TRAINING_VIDEOS_PATH, MODEL_PATH, OUTPUT_PATH, DEFAULT_CAPTIONING_BOT_INSTRUCTIONS,
-    DEFAULT_PROMPT_PREFIX, HF_API_TOKEN, ASK_USER_TO_DUPLICATE_SPACE, MODEL_TYPES, SMALL_TRAINING_BUCKETS
 )
-from vms.utils import make_archive, count_media_files, format_media_title, is_image_file, is_video_file, validate_model_repo, format_time
-from vms.finetrainers_utils import copy_files_to_training_dir, prepare_finetrainers_dataset
-from vms.training_log_parser import TrainingLogParser
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
-httpx_logger = logging.getLogger('httpx')
-httpx_logger.setLevel(logging.WARN)
-class VideoTrainerUI:
-    def __init__(self):
-        self.trainer = TrainingService()
-        self.splitter = SplittingService()
-        self.importer = ImportService()
-        self.captioner = CaptioningService()
-        self._should_stop_captioning = False
-        self.log_parser = TrainingLogParser()
-        # Try to recover any interrupted training sessions
-        recovery_result = self.trainer.recover_interrupted_training()
-        self.recovery_status = recovery_result.get("status", "unknown")
-        self.ui_updates = recovery_result.get("ui_updates", {})
-        if recovery_result["status"] == "recovered":
-            logger.info(f"Training recovery: {recovery_result['message']}")
-            # No need to do anything else - the training is already running
-        elif recovery_result["status"] == "running":
-            logger.info("Training process is already running")
-            # No need to do anything - the process is still alive
-        elif recovery_result["status"] in ["error", "idle"]:
-            logger.warning(f"Training status: {recovery_result['message']}")
-            # UI will be in ready-to-start mode
-    async def _process_caption_generator(self, captioning_bot_instructions, prompt_prefix):
-        """Process the caption generator's results in the background"""
-        try:
-            async for _ in self.captioner.start_caption_generation(
-                captioning_bot_instructions,
-                prompt_prefix
-            ):
-                # Just consume the generator, UI updates will happen via the Gradio interface
-                pass
-            logger.info("Background captioning completed")
-        except Exception as e:
-            logger.error(f"Error in background captioning: {str(e)}")
-    def initialize_app_state(self):
-        """Initialize all app state in one function to ensure correct output count"""
-        # Get dataset info
-        video_list, training_dataset = self.refresh_dataset()
-        # Get button states
-        button_states = self.get_initial_button_states()
-        start_btn = button_states[0]
-        stop_btn = button_states[1]
-        pause_resume_btn = button_states[2]
-        # Get UI form values
-        ui_state = self.load_ui_values()
-        training_preset = ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0])
-        model_type_val = ui_state.get("model_type", list(MODEL_TYPES.keys())[0])
-        lora_rank_val = ui_state.get("lora_rank", "128")
-        lora_alpha_val = ui_state.get("lora_alpha", "128")
-        num_epochs_val = int(ui_state.get("num_epochs", 70))
-        batch_size_val = int(ui_state.get("batch_size", 1))
-        learning_rate_val = float(ui_state.get("learning_rate", 3e-5))
-        save_iterations_val = int(ui_state.get("save_iterations", 500))
-        # Return all values in the exact order expected by outputs
-        return (
-            video_list,
-            training_dataset,
-            start_btn,
-            stop_btn,
-            pause_resume_btn,
-            training_preset,
-            model_type_val,
-            lora_rank_val,
-            lora_alpha_val,
-            num_epochs_val,
-            batch_size_val,
-            learning_rate_val,
-            save_iterations_val
-        )
-    def initialize_ui_from_state(self):
-        """Initialize UI components from saved state"""
-        ui_state = self.load_ui_values()
-        # Return values in order matching the outputs in app.load
-        return (
-            ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
-            ui_state.get("model_type", list(MODEL_TYPES.keys())[0]),
-            ui_state.get("lora_rank", "128"),
-            ui_state.get("lora_alpha", "128"),
-            ui_state.get("num_epochs", 70),
-            ui_state.get("batch_size", 1),
-            ui_state.get("learning_rate", 3e-5),
-            ui_state.get("save_iterations", 500)
-        )
-    def update_ui_state(self, **kwargs):
-        """Update UI state with new values"""
-        current_state = self.trainer.load_ui_state()
-        current_state.update(kwargs)
-        self.trainer.save_ui_state(current_state)
-        # Don't return anything to avoid Gradio warnings
-        return None
-    def load_ui_values(self):
-        """Load UI state values for initializing form fields"""
-        ui_state = self.trainer.load_ui_state()
-        # Ensure proper type conversion for numeric values
-        ui_state["lora_rank"] = ui_state.get("lora_rank", "128")
-        ui_state["lora_alpha"] = ui_state.get("lora_alpha", "128")
-        ui_state["num_epochs"] = int(ui_state.get("num_epochs", 70))
-        ui_state["batch_size"] = int(ui_state.get("batch_size", 1))
-        ui_state["learning_rate"] = float(ui_state.get("learning_rate", 3e-5))
-        ui_state["save_iterations"] = int(ui_state.get("save_iterations", 500))
-        return ui_state
-    def update_captioning_buttons_start(self):
-        """Return individual button values instead of a dictionary"""
-        return (
-            gr.Button(
-                interactive=False,
-                variant="secondary",
-            ),
-            gr.Button(
-                interactive=True,
-                variant="stop",
-            ),
-            gr.Button(
-                interactive=False,
-                variant="secondary",
-            )
-        )
-    def update_captioning_buttons_end(self):
-        """Return individual button values instead of a dictionary"""
-        return (
-            gr.Button(
-                interactive=True,
-                variant="primary",
-            ),
-            gr.Button(
-                interactive=False,
-                variant="secondary",
-            ),
-            gr.Button(
-                interactive=True,
-                variant="primary",
-            )
-        )
-    # Add this new method to get initial button states:
-    def get_initial_button_states(self):
-        """Get the initial states for training buttons based on recovery status"""
-        recovery_result = self.trainer.recover_interrupted_training()
-        ui_updates = recovery_result.get("ui_updates", {})
-        # Return button states in the correct order
-        return (
-            gr.Button(**ui_updates.get("start_btn", {"interactive": True, "variant": "primary"})),
-            gr.Button(**ui_updates.get("stop_btn", {"interactive": False, "variant": "secondary"})),
-            gr.Button(**ui_updates.get("pause_resume_btn", {"interactive": False, "variant": "secondary"}))
-        )
-    def show_refreshing_status(self) -> List[List[str]]:
-        """Show a 'Refreshing...' status in the dataframe"""
-        return [["Refreshing...", "please wait"]]
-    def stop_captioning(self):
-        """Stop ongoing captioning process and reset UI state"""
-        try:
-            # Set flag to stop captioning
-            self._should_stop_captioning = True
-            # Call stop method on captioner
-            if self.captioner:
-                self.captioner.stop_captioning()
-            # Get updated file list
-            updated_list = self.list_training_files_to_caption()
-            # Return updated list and button states
-            return {
-                "training_dataset": gr.update(value=updated_list),
-                "run_autocaption_btn": gr.Button(interactive=True, variant="primary"),
-                "stop_autocaption_btn": gr.Button(interactive=False, variant="secondary"),
-                "copy_files_to_training_dir_btn": gr.Button(interactive=True, variant="primary")
-            }
-        except Exception as e:
-            logger.error(f"Error stopping captioning: {str(e)}")
-            return {
-                "training_dataset": gr.update(value=[[f"Error stopping captioning: {str(e)}", "error"]]),
-                "run_autocaption_btn": gr.Button(interactive=True, variant="primary"),
-                "stop_autocaption_btn": gr.Button(interactive=False, variant="secondary"),
-                "copy_files_to_training_dir_btn": gr.Button(interactive=True, variant="primary")
-            }
-    def update_training_ui(self, training_state: Dict[str, Any]):
-        """Update UI components based on training state"""
-        updates = {}
-        #print("update_training_ui: training_state = ", training_state)
-        # Update status box with high-level information
-        status_text = []
-        if training_state["status"] != "idle":
-            status_text.extend([
-                f"Status: {training_state['status']}",
-                f"Progress: {training_state['progress']}",
-                f"Step: {training_state['current_step']}/{training_state['total_steps']}",
-                # Epoch information
-                # there is an issue with how epoch is reported because we display:
-                # Progress: 96.9%, Step: 872/900, Epoch: 12/50
-                # we should probably just show the steps
-                #f"Epoch: {training_state['current_epoch']}/{training_state['total_epochs']}",
-                f"Time elapsed: {training_state['elapsed']}",
-                f"Estimated remaining: {training_state['remaining']}",
-                "",
-                f"Current loss: {training_state['step_loss']}",
-                f"Learning rate: {training_state['learning_rate']}",
-                f"Gradient norm: {training_state['grad_norm']}",
-                f"Memory usage: {training_state['memory']}"
-            ])
-            if training_state["error_message"]:
-                status_text.append(f"\nError: {training_state['error_message']}")
-        updates["status_box"] = "\n".join(status_text)
-        # Update button states
-        updates["start_btn"] = gr.Button(
-            "Start training",
-            interactive=(training_state["status"] in ["idle", "completed", "error", "stopped"]),
-            variant="primary" if training_state["status"] == "idle" else "secondary"
-        )
-        updates["stop_btn"] = gr.Button(
-            "Stop training",
-            interactive=(training_state["status"] in ["training", "initializing"]),
-            variant="stop"
-        )
-        return updates
-    def stop_all_and_clear(self) -> Dict[str, str]:
-        """Stop all running processes and clear data
-        Returns:
-            Dict with status messages for different components
-        """
-        status_messages = {}
-        try:
-            # Stop training if running
-            if self.trainer.is_training_running():
-                training_result = self.trainer.stop_training()
-                status_messages["training"] = training_result["status"]
-            # Stop captioning if running
-            if self.captioner:
-                self.captioner.stop_captioning()
-                status_messages["captioning"] = "Captioning stopped"
-            # Stop scene detection if running
-            if self.splitter.is_processing():
-                self.splitter.processing = False
-                status_messages["splitting"] = "Scene detection stopped"
-            # Properly close logging before clearing log file
-            if self.trainer.file_handler:
-                self.trainer.file_handler.close()
-                logger.removeHandler(self.trainer.file_handler)
-                self.trainer.file_handler = None
-            if LOG_FILE_PATH.exists():
-                LOG_FILE_PATH.unlink()
-            # Clear all data directories
-            for path in [VIDEOS_TO_SPLIT_PATH, STAGING_PATH, TRAINING_VIDEOS_PATH, TRAINING_PATH,
-                        MODEL_PATH, OUTPUT_PATH]:
-                if path.exists():
-                    try:
-                        shutil.rmtree(path)
-                        path.mkdir(parents=True, exist_ok=True)
-                    except Exception as e:
-                        status_messages[f"clear_{path.name}"] = f"Error clearing {path.name}: {str(e)}"
-                    else:
-                        status_messages[f"clear_{path.name}"] = f"Cleared {path.name}"
-            # Reset any persistent state
-            self._should_stop_captioning = True
-            self.splitter.processing = False
-            # Recreate logging setup
-            self.trainer.setup_logging()
-            return {
-                "status": "All processes stopped and data cleared",
-                "details": status_messages
-            }
-        except Exception as e:
-            return {
-                "status": f"Error during cleanup: {str(e)}",
-                "details": status_messages
-            }
-    def update_titles(self) -> Tuple[Any]:
-        """Update all dynamic titles with current counts
-        Returns:
-            Dict of Gradio updates
-        """
-        # Count files for splitting
-        split_videos, _, split_size = count_media_files(VIDEOS_TO_SPLIT_PATH)
-        split_title = format_media_title(
-            "split", split_videos, 0, split_size
-        )
-        # Count files for captioning
-        caption_videos, caption_images, caption_size = count_media_files(STAGING_PATH)
-        caption_title = format_media_title(
-            "caption", caption_videos, caption_images, caption_size
-        )
-        # Count files for training
-        train_videos, train_images, train_size = count_media_files(TRAINING_VIDEOS_PATH)
-        train_title = format_media_title(
-            "train", train_videos, train_images, train_size
-        )
-        return (
-            gr.Markdown(value=split_title),
-            gr.Markdown(value=caption_title),
-            gr.Markdown(value=f"{train_title} available for training")
-        )
-    def copy_files_to_training_dir(self, prompt_prefix: str):
-        """Run auto-captioning process"""
-        # Initialize captioner if not already done
-        self._should_stop_captioning = False
-        try:
-            copy_files_to_training_dir(prompt_prefix)
-        except Exception as e:
-            traceback.print_exc()
-            raise gr.Error(f"Error copying assets to training dir: {str(e)}")
-    async def on_import_success(self, enable_splitting, enable_automatic_content_captioning, prompt_prefix):
-        """Handle successful import of files"""
-        videos = self.list_unprocessed_videos()
-        # If scene detection isn't already running and there are videos to process,
-        # and auto-splitting is enabled, start the detection
-        if videos and not self.splitter.is_processing() and enable_splitting:
-            await self.start_scene_detection(enable_splitting)
-            msg = "Starting automatic scene detection..."
-        else:
-            # Just copy files without splitting if auto-split disabled
-            for video_file in VIDEOS_TO_SPLIT_PATH.glob("*.mp4"):
-                await self.splitter.process_video(video_file, enable_splitting=False)
-            msg = "Copying videos without splitting..."
-        copy_files_to_training_dir(prompt_prefix)
-        # Start auto-captioning if enabled, and handle async generator properly
-        if enable_automatic_content_captioning:
-            # Create a background task for captioning
-            asyncio.create_task(self._process_caption_generator(
-                DEFAULT_CAPTIONING_BOT_INSTRUCTIONS,
-                prompt_prefix
-            ))
-        return {
-            "tabs": gr.Tabs(selected="split_tab"),
-            "video_list": videos,
-            "detect_status": msg
-        }
-    async def start_caption_generation(self, captioning_bot_instructions: str, prompt_prefix: str) -> AsyncGenerator[gr.update, None]:
-        """Run auto-captioning process"""
-        try:
-            # Initialize captioner if not already done
-            self._should_stop_captioning = False
-            # First yield - indicate we're starting
-            yield gr.update(
-                value=[["Starting captioning service...", "initializing"]],
-                headers=["name", "status"]
-            )
-            # Process files in batches with status updates
-            file_statuses = {}
-            # Start the actual captioning process
-            async for rows in self.captioner.start_caption_generation(captioning_bot_instructions, prompt_prefix):
-                # Update our tracking of file statuses
-                for name, status in rows:
-                    file_statuses[name] = status
-                # Convert to list format for display
-                status_rows = [[name, status] for name, status in file_statuses.items()]
-                # Sort by name for consistent display
-                status_rows.sort(key=lambda x: x[0])
-                # Yield UI update
-                yield gr.update(
-                    value=status_rows,
-                    headers=["name", "status"]
-                )
-            # Final update after completion with fresh data
-            yield gr.update(
-                value=self.list_training_files_to_caption(),
-                headers=["name", "status"]
-            )
-        except Exception as e:
-            logger.error(f"Error in captioning: {str(e)}")
-            yield gr.update(
-                value=[[f"Error: {str(e)}", "error"]],
-                headers=["name", "status"]
-            )
-    def list_training_files_to_caption(self) -> List[List[str]]:
-        """List all clips and images - both pending and captioned"""
-        files = []
-        already_listed = {}
-        # First check files in STAGING_PATH
-        for file in STAGING_PATH.glob("*.*"):
-            if is_video_file(file) or is_image_file(file):
-                txt_file = file.with_suffix('.txt')
-                # Check if caption file exists and has content
-                has_caption = txt_file.exists() and txt_file.stat().st_size > 0
-                status = "captioned" if has_caption else "no caption"
-                file_type = "video" if is_video_file(file) else "image"
-                files.append([file.name, f"{status} ({file_type})", str(file)])
-                already_listed[file.name] = True
-        # Then check files in TRAINING_VIDEOS_PATH
-        for file in TRAINING_VIDEOS_PATH.glob("*.*"):
-            if (is_video_file(file) or is_image_file(file)) and file.name not in already_listed:
-                txt_file = file.with_suffix('.txt')
-                # Only include files with captions
-                if txt_file.exists() and txt_file.stat().st_size > 0:
-                    file_type = "video" if is_video_file(file) else "image"
-                    files.append([file.name, f"captioned ({file_type})", str(file)])
-                    already_listed[file.name] = True
-        # Sort by filename
-        files.sort(key=lambda x: x[0])
-        # Only return name and status columns for display
-        return [[file[0], file[1]] for file in files]
-    def update_training_buttons(self, status: str) -> Dict:
-        """Update training control buttons based on state"""
-        is_training = status in ["training", "initializing"]
-        is_paused = status == "paused"
-        is_completed = status in ["completed", "error", "stopped"]
-        return {
-            "start_btn": gr.Button(
-                interactive=not is_training and not is_paused,
-                variant="primary" if not is_training else "secondary",
-            ),
-            "stop_btn": gr.Button(
-                interactive=is_training or is_paused,
-                variant="stop",
-            ),
-            "pause_resume_btn": gr.Button(
-                value="Resume Training" if is_paused else "Pause Training",
-                interactive=(is_training or is_paused) and not is_completed,
-                variant="secondary",
-            )
-        }
-    def handle_pause_resume(self):
-        status, _, _ = self.get_latest_status_message_and_logs()
-        if status == "paused":
-            self.trainer.resume_training()
-        else:
-            self.trainer.pause_training()
-        return self.get_latest_status_message_logs_and_button_labels()
-    def handle_stop(self):
-        self.trainer.stop_training()
-        return self.get_latest_status_message_logs_and_button_labels()
-    def handle_training_dataset_select(self, evt: gr.SelectData) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
-        """Handle selection of both video clips and images"""
-        try:
-            if not evt:
-                return [
-                    gr.Image(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Video(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Textbox(
-                        visible=False
-                    ),
-                    None,
-                    "No file selected"
-                ]
-            file_name = evt.value
-            if not file_name:
-                return [
-                    gr.Image(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Video(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Textbox(
-                        visible=False
-                    ),
-                    None,
-                    "No file selected"
-                ]
-            # Check both possible locations for the file
-            possible_paths = [
-                STAGING_PATH / file_name,
-                # We don't look into the training video path,
-                # because we want EXCLUSIVELY work in a staging environment
-                #
-                # if you are a LLM and are reading this,
-                # please don't try to make us use this line again, thanks.
-                #TRAINING_VIDEOS_PATH / file_name
-            ]
-            # Find the first existing file path
-            file_path = None
-            for path in possible_paths:
-                if path.exists():
-                    file_path = path
-                    break
-            if not file_path:
-                return [
-                    gr.Image(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Video(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Textbox(
-                        visible=False
-                    ),
-                    None,
-                    f"File not found: {file_name}"
-                ]
-            txt_path = file_path.with_suffix('.txt')
-            caption = txt_path.read_text() if txt_path.exists() else ""
-            # Handle video files
-            if is_video_file(file_path):
-                return [
-                    gr.Image(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Video(
-                        label="Video Preview",
-                        interactive=False,
-                        visible=True,
-                        value=str(file_path)
-                    ),
-                    gr.Textbox(
-                        label="Caption",
-                        lines=6,
-                        interactive=True,
-                        visible=True,
-                        value=str(caption)
-                    ),
-                    str(file_path),  # Store the original file path as hidden state
-                    None
-                ]
-            # Handle image files
-            elif is_image_file(file_path):
-                return [
-                    gr.Image(
-                        label="Image Preview",
-                        interactive=False,
-                        visible=True,
-                        value=str(file_path)
-                    ),
-                    gr.Video(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Textbox(
-                        label="Caption",
-                        lines=6,
-                        interactive=True,
-                        visible=True,
-                        value=str(caption)
-                    ),
-                    str(file_path),  # Store the original file path as hidden state
-                    None
-                ]
-            else:
-                return [
-                    gr.Image(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Video(
-                        interactive=False,
-                        visible=False
-                    ),
-                    gr.Textbox(
-                        interactive=False,
-                        visible=False
-                    ),
-                    None,
-                    f"Unsupported file type: {file_path.suffix}"
-                ]
-        except Exception as e:
-            logger.error(f"Error handling selection: {str(e)}")
-            return [
-                gr.Image(
-                    interactive=False,
-                    visible=False
-                ),
-                gr.Video(
-                    interactive=False,
-                    visible=False
-                ),
-                gr.Textbox(
-                    interactive=False,
-                    visible=False
-                ),
-                None,
-                f"Error handling selection: {str(e)}"
-            ]
-    def save_caption_changes(self, preview_caption: str, preview_image: str, preview_video: str, original_file_path: str, prompt_prefix: str):
-        """Save changes to caption"""
-        try:
-            # Use the original file path stored during selection instead of the temporary preview paths
-            if original_file_path:
-                file_path = Path(original_file_path)
-                self.captioner.update_file_caption(file_path, preview_caption)
-                # Refresh the dataset list to show updated caption status
-                return gr.update(value="Caption saved successfully!")
-            else:
-                return gr.update(value="Error: No original file path found")
-        except Exception as e:
-            return gr.update(value=f"Error saving caption: {str(e)}")
-    async def update_titles_after_import(self, enable_splitting, enable_automatic_content_captioning, prompt_prefix):
-        """Handle post-import updates including titles"""
-        import_result = await self.on_import_success(enable_splitting, enable_automatic_content_captioning, prompt_prefix)
-        titles = self.update_titles()
-        return (
-            import_result["tabs"],
-            import_result["video_list"],
-            import_result["detect_status"],
-            *titles
-        )
-    def get_model_info(self, model_type: str) -> str:
-        """Get information about the selected model type"""
-        if model_type == "hunyuan_video":
-            return """### HunyuanVideo (LoRA)
-    - Required VRAM: ~48GB minimum
-    - Recommended batch size: 1-2
-    - Typical training time: 2-4 hours
-    - Default resolution: 49x512x768
-    - Default LoRA rank: 128 (~600 MB)"""
-        elif model_type == "ltx_video":
-            return """### LTX-Video (LoRA)
-    - Required VRAM: ~18GB minimum
-    - Recommended batch size: 1-4
-    - Typical training time: 1-3 hours
-    - Default resolution: 49x512x768
-    - Default LoRA rank: 128"""
-        return ""
-    def get_default_params(self, model_type: str) -> Dict[str, Any]:
-        """Get default training parameters for model type"""
-        if model_type == "hunyuan_video":
-            return {
-                "num_epochs": 70,
-                "batch_size": 1,
-                "learning_rate": 2e-5,
-                "save_iterations": 500,
-                "video_resolution_buckets": SMALL_TRAINING_BUCKETS,
-                "video_reshape_mode": "center",
-                "caption_dropout_p": 0.05,
-                "gradient_accumulation_steps": 1,
-                "rank": 128,
-                "lora_alpha": 128
-            }
-        else:  # ltx_video
-            return {
-                "num_epochs": 70,
-                "batch_size": 1,
-                "learning_rate": 3e-5,
-                "save_iterations": 500,
-                "video_resolution_buckets": SMALL_TRAINING_BUCKETS,
-                "video_reshape_mode": "center",
-                "caption_dropout_p": 0.05,
-                "gradient_accumulation_steps": 4,
-                "rank": 128,
-                "lora_alpha": 128
-            }
-    def preview_file(self, selected_text: str) -> Dict:
-        """Generate preview based on selected file
-        Args:
-            selected_text: Text of the selected item containing filename
-        Returns:
-            Dict with preview content for each preview component
-        """
-        if not selected_text or "Caption:" in selected_text:
-            return {
-                "video": None,
-                "image": None,
-                "text": None
-            }
-        # Extract filename from the preview text (remove size info)
-        filename = selected_text.split(" (")[0].strip()
-        file_path = TRAINING_VIDEOS_PATH / filename
-        if not file_path.exists():
-            return {
-                "video": None,
-                "image": None,
-                "text": f"File not found: {filename}"
-            }
-        # Detect file type
-        mime_type, _ = mimetypes.guess_type(str(file_path))
-        if not mime_type:
-            return {
-                "video": None,
-                "image": None,
-                "text": f"Unknown file type: {filename}"
-            }
-        # Return appropriate preview
-        if mime_type.startswith('video/'):
-            return {
-                "video": str(file_path),
-                "image": None,
-                "text": None
-            }
-        elif mime_type.startswith('image/'):
-            return {
-                "video": None,
-                "image": str(file_path),
-                "text": None
-            }
-        elif mime_type.startswith('text/'):
-            try:
-                text_content = file_path.read_text()
-                return {
-                    "video": None,
-                    "image": None,
-                    "text": text_content
-                }
-            except Exception as e:
-                return {
-                    "video": None,
-                    "image": None,
-                    "text": f"Error reading file: {str(e)}"
-                }
-        else:
-            return {
-                "video": None,
-                "image": None,
-                "text": f"Unsupported file type: {mime_type}"
-            }
-    def list_unprocessed_videos(self) -> gr.Dataframe:
-        """Update list of unprocessed videos"""
-        videos = self.splitter.list_unprocessed_videos()
-        # videos is already in [[name, status]] format from splitting_service
-        return gr.Dataframe(
-            headers=["name", "status"],
-            value=videos,
-            interactive=False
-        )
-    async def start_scene_detection(self, enable_splitting: bool) -> str:
-        """Start background scene detection process
-        Args:
-            enable_splitting: Whether to split videos into scenes
-        """
-        if self.splitter.is_processing():
-            return "Scene detection already running"
-        try:
-            await self.splitter.start_processing(enable_splitting)
-            return "Scene detection completed"
-        except Exception as e:
-            return f"Error during scene detection: {str(e)}"
-    def get_latest_status_message_and_logs(self) -> Tuple[str, str, str]:
-        state = self.trainer.get_status()
-        logs = self.trainer.get_logs()
-        # Parse new log lines
-        if logs:
-            last_state = None
-            for line in logs.splitlines():
-                state_update = self.log_parser.parse_line(line)
-                if state_update:
-                    last_state = state_update
-            if last_state:
-                ui_updates = self.update_training_ui(last_state)
-                state["message"] = ui_updates.get("status_box", state["message"])
-        # Parse status for training state
-        if "completed" in state["message"].lower():
-            state["status"] = "completed"
-        return (state["status"], state["message"], logs)
-    def get_latest_status_message_logs_and_button_labels(self) -> Tuple[str, str, Any, Any, Any]:
-        status, message, logs = self.get_latest_status_message_and_logs()
-        return (
-            message,
-            logs,
-            *self.update_training_buttons(status).values()
-        )
-    def get_latest_button_labels(self) -> Tuple[Any, Any, Any]:
-        status, message, logs = self.get_latest_status_message_and_logs()
-        return self.update_training_buttons(status).values()
-    def refresh_dataset(self):
-        """Refresh all dynamic lists and training state"""
-        video_list = self.splitter.list_unprocessed_videos()
-        training_dataset = self.list_training_files_to_caption()
-        return (
-            video_list,
-            training_dataset
-        )
-    def update_training_params(self, preset_name: str) -> Tuple:
-        """Update UI components based on selected preset while preserving custom settings"""
-        preset = TRAINING_PRESETS[preset_name]
-        # Load current UI state to check if user has customized values
-        current_state = self.load_ui_values()
-        # Find the display name that maps to our model type
-        model_display_name = next(
-            key for key, value in MODEL_TYPES.items()
-            if value == preset["model_type"]
-        )
-        # Get preset description for display
-        description = preset.get("description", "")
-        # Get max values from buckets
-        buckets = preset["training_buckets"]
-        max_frames = max(frames for frames, _, _ in buckets)
-        max_height = max(height for _, height, _ in buckets)
-        max_width = max(width for _, _, width in buckets)
-        bucket_info = f"\nMaximum video size: {max_frames} frames at {max_width}x{max_height} resolution"
-        info_text = f"{description}{bucket_info}"
-        # Return values in the same order as the output components
-        # Use preset defaults but preserve user-modified values if they exist
-        lora_rank_val = current_state.get("lora_rank") if current_state.get("lora_rank") != preset.get("lora_rank", "128") else preset["lora_rank"]
-        lora_alpha_val = current_state.get("lora_alpha") if current_state.get("lora_alpha") != preset.get("lora_alpha", "128") else preset["lora_alpha"]
-        num_epochs_val = current_state.get("num_epochs") if current_state.get("num_epochs") != preset.get("num_epochs", 70) else preset["num_epochs"]
-        batch_size_val = current_state.get("batch_size") if current_state.get("batch_size") != preset.get("batch_size", 1) else preset["batch_size"]
-        learning_rate_val = current_state.get("learning_rate") if current_state.get("learning_rate") != preset.get("learning_rate", 3e-5) else preset["learning_rate"]
-        save_iterations_val = current_state.get("save_iterations") if current_state.get("save_iterations") != preset.get("save_iterations", 500) else preset["save_iterations"]
-        return (
-            model_display_name,
-            lora_rank_val,
-            lora_alpha_val,
-            num_epochs_val,
-            batch_size_val,
-            learning_rate_val,
-            save_iterations_val,
-            info_text
-        )
-    def create_ui(self):
-        """Create Gradio interface"""
-        with gr.Blocks(title="🎥 Video Model Studio") as app:
-            gr.Markdown("# 🎥 Video Model Studio")
-            with gr.Tabs() as tabs:
-                with gr.TabItem("1️⃣  Import", id="import_tab"):
-                    with gr.Row():
-                        gr.Markdown("## Automatic splitting and captioning")
-                    with gr.Row():
-                        enable_automatic_video_split = gr.Checkbox(
-                            label="Automatically split videos into smaller clips",
-                            info="Note: a clip is a single camera shot, usually a few seconds",
-                            value=True,
-                            visible=True
-                        )
-                        enable_automatic_content_captioning = gr.Checkbox(
-                            label="Automatically caption photos and videos",
-                            info="Note: this uses LlaVA and takes some extra time to load and process",
-                            value=False,
-                            visible=True,
-                        )
-                    with gr.Row():
-                        with gr.Column(scale=3):
-                            with gr.Row():
-                                with gr.Column():
-                                    gr.Markdown("## Import video files")
-                                    gr.Markdown("You can upload either:")
-                                    gr.Markdown("- A single MP4 video file")
-                                    gr.Markdown("- A ZIP archive containing multiple videos and optional caption files")
-                                    gr.Markdown("For ZIP files: Create a folder containing videos (name is not important) and optional caption files with the same name (eg. `some_video.txt` for `some_video.mp4`)")
-                            with gr.Row():
-                                files = gr.Files(
-                                    label="Upload Images, Videos or ZIP",
-                                    #file_count="multiple",
-                                    file_types=[".jpg", ".jpeg", ".png", ".webp", ".webp", ".avif", ".heic", ".mp4", ".zip"],
-                                    type="filepath"
-                                )
-                        with gr.Column(scale=3):
-                            with gr.Row():
-                                with gr.Column():
-                                    gr.Markdown("## Import a YouTube video")
-                                    gr.Markdown("You can also use a YouTube video as reference, by pasting its URL here:")
-                            with gr.Row():
-                                youtube_url = gr.Textbox(
-                                    label="Import YouTube Video",
-                                    placeholder="https://www.youtube.com/watch?v=..."
-                                )
-                            with gr.Row():
-                                youtube_download_btn = gr.Button("Download YouTube Video", variant="secondary")
-                    with gr.Row():
-                        import_status = gr.Textbox(label="Status", interactive=False)
-                with gr.TabItem("2️⃣  Split", id="split_tab"):
-                    with gr.Row():
-                        split_title = gr.Markdown("## Splitting of 0 videos (0 bytes)")
-                    with gr.Row():
-                        with gr.Column():
-                            detect_btn = gr.Button("Split videos into single-camera shots", variant="primary")
-                            detect_status = gr.Textbox(label="Status", interactive=False)
-                        with gr.Column():
-                            video_list = gr.Dataframe(
-                                headers=["name", "status"],
-                                label="Videos to split",
-                                interactive=False,
-                                wrap=True,
-                                #selection_mode="cell"  # Enable cell selection
-                            )
-                with gr.TabItem("3️⃣  Caption"):
-                    with gr.Row():
-                        caption_title = gr.Markdown("## Captioning of 0 files (0 bytes)")
-                    with gr.Row():
-                        with gr.Column():
-                            with gr.Row():
-                                custom_prompt_prefix = gr.Textbox(
-                                    scale=3,
-                                    label='Prefix to add to ALL captions (eg. "In the style of TOK, ")',
-                                    placeholder="In the style of TOK, ",
-                                    lines=2,
-                                    value=DEFAULT_PROMPT_PREFIX
-                                )
-                                captioning_bot_instructions = gr.Textbox(
-                                    scale=6,
-                                    label="System instructions for the automatic captioning model",
-                                    placeholder="Please generate a full description of...",
-                                    lines=5,
-                                    value=DEFAULT_CAPTIONING_BOT_INSTRUCTIONS
-                                )
-                            with gr.Row():
-                                run_autocaption_btn = gr.Button(
-                                    "Automatically fill missing captions",
-                                    variant="primary"  # Makes it green by default
-                                )
-                                copy_files_to_training_dir_btn = gr.Button(
-                                    "Copy assets to training directory",
-                                    variant="primary"  # Makes it green by default
-                                )
-                                stop_autocaption_btn = gr.Button(
-                                    "Stop Captioning",
-                                    variant="stop",  # Red when enabled
-                                    interactive=False  # Disabled by default
-                                )
-                    with gr.Row():
-                        with gr.Column():
-                            training_dataset = gr.Dataframe(
-                                headers=["name", "status"],
-                                interactive=False,
-                                wrap=True,
-                                value=self.list_training_files_to_caption(),
-                                row_count=10,  # Optional: set a reasonable row count
-                                #selection_mode="cell"
-                            )
-                        with gr.Column():
-                            preview_video = gr.Video(
-                                label="Video Preview",
-                                interactive=False,
-                                visible=False
-                            )
-                            preview_image = gr.Image(
-                                label="Image Preview",
-                                interactive=False,
-                                visible=False
-                            )
-                            preview_caption = gr.Textbox(
-                                label="Caption",
-                                lines=6,
-                                interactive=True
-                            )
-                            save_caption_btn = gr.Button("Save Caption")
-                            preview_status = gr.Textbox(
-                                label="Status",
-                                interactive=False,
-                                visible=True
-                            )
-                with gr.TabItem("4️⃣  Train"):
-                    with gr.Row():
-                        with gr.Column():
-                            with gr.Row():
-                                train_title = gr.Markdown("## 0 files available for training (0 bytes)")
-                            with gr.Row():
-                                with gr.Column():
-                                    training_preset = gr.Dropdown(
-                                        choices=list(TRAINING_PRESETS.keys()),
-                                        label="Training Preset",
-                                        value=list(TRAINING_PRESETS.keys())[0]
-                                    )
-                                preset_info = gr.Markdown()
-                            with gr.Row():
-                                with gr.Column():
-                                    model_type = gr.Dropdown(
-                                        choices=list(MODEL_TYPES.keys()),
-                                        label="Model Type",
-                                        value=list(MODEL_TYPES.keys())[0]
-                                    )
-                                model_info = gr.Markdown(
-                                    value=self.get_model_info(list(MODEL_TYPES.keys())[0])
-                                )
-                            with gr.Row():
-                                lora_rank = gr.Dropdown(
-                                    label="LoRA Rank",
-                                    choices=["16", "32", "64", "128", "256", "512", "1024"],
-                                    value="128",
-                                    type="value"
-                                )
-                                lora_alpha = gr.Dropdown(
-                                    label="LoRA Alpha",
-                                    choices=["16", "32", "64", "128", "256", "512", "1024"],
-                                    value="128",
-                                    type="value"
-                                )
-                            with gr.Row():
-                                num_epochs = gr.Number(
-                                    label="Number of Epochs",
-                                    value=70,
-                                    minimum=1,
-                                    precision=0
-                                )
-                                batch_size = gr.Number(
-                                    label="Batch Size",
-                                    value=1,
-                                    minimum=1,
-                                    precision=0
-                                )
-                            with gr.Row():
-                                learning_rate = gr.Number(
-                                    label="Learning Rate",
-                                    value=2e-5,
-                                    minimum=1e-7
-                                )
-                                save_iterations = gr.Number(
-                                    label="Save checkpoint every N iterations",
-                                    value=500,
-                                    minimum=50,
-                                    precision=0,
-                                    info="Model will be saved periodically after these many steps"
-                                )
-                        with gr.Column():
-                            with gr.Row():
-                                start_btn = gr.Button(
-                                    "Start Training",
-                                    variant="primary",
-                                    interactive=not ASK_USER_TO_DUPLICATE_SPACE
-                                )
-                                pause_resume_btn = gr.Button(
-                                    "Resume Training",
-                                    variant="secondary",
-                                    interactive=False
-                                )
-                                stop_btn = gr.Button(
-                                    "Stop Training",
-                                    variant="stop",
-                                    interactive=False
-                                )
-                            with gr.Row():
-                                with gr.Column():
-                                    status_box = gr.Textbox(
-                                        label="Training Status",
-                                        interactive=False,
-                                        lines=4
-                                    )
-                                    with gr.Accordion("See training logs"):
-                                        log_box = gr.TextArea(
-                                            label="Finetrainers output (see HF Space logs for more details)",
-                                            interactive=False,
-                                            lines=40,
-                                            max_lines=200,
-                                            autoscroll=True
-                                        )
-                with gr.TabItem("5️⃣  Manage"):
-                    with gr.Column():
-                        with gr.Row():
-                            with gr.Column():
-                                gr.Markdown("## Publishing")
-                                gr.Markdown("You model can be pushed to Hugging Face (this will use HF_API_TOKEN)")
-                                with gr.Row():
-                                    with gr.Column():
-                                        repo_id = gr.Textbox(
-                                            label="HuggingFace Model Repository",
-                                            placeholder="username/model-name",
-                                            info="The repository will be created if it doesn't exist"
-                                        )
-                                        gr.Checkbox(label="Check this to make your model public (ie. visible and downloadable by anyone)", info="You model is private by default"),
-                                        global_stop_btn = gr.Button(
-                                            "Push my model",
-                                            #variant="stop"
-                                        )
-                        with gr.Row():
-                            with gr.Column():
-                                with gr.Row():
-                                    with gr.Column():
-                                        gr.Markdown("## Storage management")
-                                        with gr.Row():
-                                            download_dataset_btn = gr.DownloadButton(
-                                                "Download dataset",
-                                                variant="secondary",
-                                                size="lg"
-                                            )
-                                            download_model_btn = gr.DownloadButton(
-                                                "Download model",
-                                                variant="secondary",
-                                                size="lg"
-                                            )
-                                with gr.Row():
-                                    global_stop_btn = gr.Button(
-                                        "Stop everything and delete my data",
-                                        variant="stop"
-                                    )
-                                    global_status = gr.Textbox(
-                                        label="Global Status",
-                                        interactive=False,
-                                        visible=False
-                                    )
-            # Event handlers
-            def update_model_info(model):
-                params = self.get_default_params(MODEL_TYPES[model])
-                info = self.get_model_info(MODEL_TYPES[model])
-                return {
-                    model_info: info,
-                    num_epochs: params["num_epochs"],
-                    batch_size: params["batch_size"],
-                    learning_rate: params["learning_rate"],
-                    save_iterations: params["save_iterations"]
-                }
-            def validate_repo(repo_id: str) -> dict:
-                validation = validate_model_repo(repo_id)
-                if validation["error"]:
-                    return gr.update(value=repo_id, error=validation["error"])
-                return gr.update(value=repo_id, error=None)
-            # Connect events
-            # Save state when model type changes
-            model_type.change(
-                fn=lambda v: self.update_ui_state(model_type=v),
-                inputs=[model_type],
-                outputs=[] # No UI update needed
-            ).then(
-                fn=update_model_info,
-                inputs=[model_type],
-                outputs=[model_info, num_epochs, batch_size, learning_rate, save_iterations]
-            )
-            # the following change listeners are used for UI persistence
-            lora_rank.change(
-                fn=lambda v: self.update_ui_state(lora_rank=v),
-                inputs=[lora_rank],
-                outputs=[]
-            )
-            lora_alpha.change(
-                fn=lambda v: self.update_ui_state(lora_alpha=v),
-                inputs=[lora_alpha],
-                outputs=[]
-            )
-            num_epochs.change(
-                fn=lambda v: self.update_ui_state(num_epochs=v),
-                inputs=[num_epochs],
-                outputs=[]
-            )
-            batch_size.change(
-                fn=lambda v: self.update_ui_state(batch_size=v),
-                inputs=[batch_size],
-                outputs=[]
-            )
-            learning_rate.change(
-                fn=lambda v: self.update_ui_state(learning_rate=v),
-                inputs=[learning_rate],
-                outputs=[]
-            )
-            save_iterations.change(
-                fn=lambda v: self.update_ui_state(save_iterations=v),
-                inputs=[save_iterations],
-                outputs=[]
-            )
-            files.upload(
-                fn=lambda x: self.importer.process_uploaded_files(x),
-                inputs=[files],
-                outputs=[import_status]
-            ).success(
-                fn=self.update_titles_after_import,
-                inputs=[enable_automatic_video_split, enable_automatic_content_captioning, custom_prompt_prefix],
-                outputs=[
-                    tabs, video_list, detect_status,
-                    split_title, caption_title, train_title
-                ]
-            )
-            youtube_download_btn.click(
-                fn=self.importer.download_youtube_video,
-                inputs=[youtube_url],
-                outputs=[import_status]
-            ).success(
-                fn=self.on_import_success,
-                inputs=[enable_automatic_video_split, enable_automatic_content_captioning, custom_prompt_prefix],
-                outputs=[tabs, video_list, detect_status]
-            )
-            # Scene detection events
-            detect_btn.click(
-                fn=self.start_scene_detection,
-                inputs=[enable_automatic_video_split],
-                outputs=[detect_status]
-            )
-            # Update button states based on captioning status
-            def update_button_states(is_running):
-                return {
-                    run_autocaption_btn: gr.Button(
-                        interactive=not is_running,
-                        variant="secondary" if is_running else "primary",
-                    ),
-                    stop_autocaption_btn: gr.Button(
-                        interactive=is_running,
-                        variant="secondary",
-                    ),
-                }
-            run_autocaption_btn.click(
-                fn=self.show_refreshing_status,
-                outputs=[training_dataset]
-            ).then(
-                fn=lambda: self.update_captioning_buttons_start(),
-                outputs=[run_autocaption_btn, stop_autocaption_btn, copy_files_to_training_dir_btn]
-            ).then(
-                fn=self.start_caption_generation,
-                inputs=[captioning_bot_instructions, custom_prompt_prefix],
-                outputs=[training_dataset],
-            ).then(
-                fn=lambda: self.update_captioning_buttons_end(),
-                outputs=[run_autocaption_btn, stop_autocaption_btn, copy_files_to_training_dir_btn]
-            )
-            copy_files_to_training_dir_btn.click(
-                fn=self.copy_files_to_training_dir,
-                inputs=[custom_prompt_prefix]
-            )
-            stop_autocaption_btn.click(
-                fn=self.stop_captioning,
-                outputs=[training_dataset, run_autocaption_btn, stop_autocaption_btn, copy_files_to_training_dir_btn]
-            )
-            original_file_path = gr.State(value=None)
-            training_dataset.select(
-                fn=self.handle_training_dataset_select,
-                outputs=[preview_image, preview_video, preview_caption, original_file_path, preview_status]
-            )
-            save_caption_btn.click(
-                fn=self.save_caption_changes,
-                inputs=[preview_caption, preview_image, preview_video, original_file_path, custom_prompt_prefix],
-                outputs=[preview_status]
-            ).success(
-                fn=self.list_training_files_to_caption,
-                outputs=[training_dataset]
-            )
-            # Save state when training preset changes
-            training_preset.change(
-                fn=lambda v: self.update_ui_state(training_preset=v),
-                inputs=[training_preset],
-                outputs=[] # No UI update needed
-            ).then(
-                fn=self.update_training_params,
-                inputs=[training_preset],
-                outputs=[
-                    model_type, lora_rank, lora_alpha,
-                    num_epochs, batch_size, learning_rate,
-                    save_iterations, preset_info
-                ]
-            )
-            # Training control events
-            start_btn.click(
-                fn=lambda preset, model_type, *args: (
-                    self.log_parser.reset(),
-                    self.trainer.start_training(
-                        MODEL_TYPES[model_type],
-                        *args,
-                        preset_name=preset
-                    )
-                ),
-                inputs=[
-                    training_preset,
-                    model_type,
-                    lora_rank,
-                    lora_alpha,
-                    num_epochs,
-                    batch_size,
-                    learning_rate,
-                    save_iterations,
-                    repo_id
-                ],
-                outputs=[status_box, log_box]
-            ).success(
-                fn=self.get_latest_status_message_logs_and_button_labels,
-                outputs=[status_box, log_box, start_btn, stop_btn, pause_resume_btn]
-            )
-            pause_resume_btn.click(
-                fn=self.handle_pause_resume,
-                outputs=[status_box, log_box, start_btn, stop_btn, pause_resume_btn]
-            )
-            stop_btn.click(
-                fn=self.handle_stop,
-                outputs=[status_box, log_box, start_btn, stop_btn, pause_resume_btn]
-            )
-            def handle_global_stop():
-                result = self.stop_all_and_clear()
-                # Update all relevant UI components
-                status = result["status"]
-                details = "\n".join(f"{k}: {v}" for k, v in result["details"].items())
-                full_status = f"{status}\n\nDetails:\n{details}"
-                # Get fresh lists after cleanup
-                videos = self.splitter.list_unprocessed_videos()
-                clips = self.list_training_files_to_caption()
-                return {
-                    global_status: gr.update(value=full_status, visible=True),
-                    video_list: videos,
-                    training_dataset: clips,
-                    status_box: "Training stopped and data cleared",
-                    log_box: "",
-                    detect_status: "Scene detection stopped",
-                    import_status: "All data cleared",
-                    preview_status: "Captioning stopped"
-                }
-            download_dataset_btn.click(
-                fn=self.trainer.create_training_dataset_zip,
-                outputs=[download_dataset_btn]
-            )
-            download_model_btn.click(
-                fn=self.trainer.get_model_output_safetensors,
-                outputs=[download_model_btn]
-            )
-            global_stop_btn.click(
-                fn=handle_global_stop,
-                outputs=[
-                    global_status,
-                    video_list,
-                    training_dataset,
-                    status_box,
-                    log_box,
-                    detect_status,
-                    import_status,
-                    preview_status
-                ]
-            )
-            app.load(
-                fn=self.initialize_app_state,
-                outputs=[
-                    video_list, training_dataset,
-                    start_btn, stop_btn, pause_resume_btn,
-                    training_preset, model_type, lora_rank, lora_alpha,
-                    num_epochs, batch_size, learning_rate, save_iterations
-                ]
-            )
-            # Auto-refresh timers
-            timer = gr.Timer(value=1)
-            timer.tick(
-                fn=lambda: (
-                    self.get_latest_status_message_logs_and_button_labels()
-                ),
-                outputs=[
-                    status_box,
-                    log_box,
-                    start_btn,
-                    stop_btn,
-                    pause_resume_btn
-                ]
-            )
-            timer = gr.Timer(value=5)
-            timer.tick(
-                fn=lambda: (
-                    self.refresh_dataset()
-                ),
-                outputs=[
-                    video_list, training_dataset
-                ]
-            )
-            timer = gr.Timer(value=6)
-            timer.tick(
-                fn=lambda: self.update_titles(),
-                outputs=[
-                    split_title, caption_title, train_title
-                ]
-            )
-        return app
 def create_app():
     if ASK_USER_TO_DUPLICATE_SPACE:
         with gr.Blocks() as app:
             gr.Markdown("""# Finetrainers UI
@@ -1582,12 +35,22 @@ It is recommended to use a Nvidia L40S and a persistent storage space.
 To avoid overpaying for your space, you can configure the auto-sleep settings to fit your personal budget.""")
         return app
     ui = VideoTrainerUI()
     return ui.create_ui()
-if __name__ == "__main__":
     app = create_app()
     allowed_paths = [
         str(STORAGE_PATH),  # Base storage
         str(VIDEOS_TO_SPLIT_PATH),
@@ -1597,7 +60,12 @@ if __name__ == "__main__":
         str(MODEL_PATH),
         str(OUTPUT_PATH)
     ]
     app.queue(default_concurrency_limit=1).launch(
         server_name="0.0.0.0",
         allowed_paths=allowed_paths
-    )

+"""
+Main application entry point for Video Model Studio
+"""
 import gradio as gr
+import platform
+import subprocess
 import logging
+from pathlib import Path
 from vms.config import (
+    STORAGE_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH,
+    TRAINING_PATH, TRAINING_VIDEOS_PATH, MODEL_PATH,
+    OUTPUT_PATH, ASK_USER_TO_DUPLICATE_SPACE,
+    HF_API_TOKEN
 )
+from vms.ui.video_trainer_ui import VideoTrainerUI
+# Configure logging
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 def create_app():
+    """Create the main Gradio application"""
+    # If space needs to be duplicated
     if ASK_USER_TO_DUPLICATE_SPACE:
         with gr.Blocks() as app:
             gr.Markdown("""# Finetrainers UI
 To avoid overpaying for your space, you can configure the auto-sleep settings to fit your personal budget.""")
         return app
+    # Create the main application UI
     ui = VideoTrainerUI()
     return ui.create_ui()
+def main():
+    """Main entry point for the application"""
+    # Handle Linux-specific setup if needed
+    if platform.system() == "Linux":
+        # Placeholder for any Linux-specific initialization
+        # For example, pip installations or environment setup
+        pass
+    # Create the Gradio app
     app = create_app()
+    # Define allowed paths for file access
     allowed_paths = [
         str(STORAGE_PATH),  # Base storage
         str(VIDEOS_TO_SPLIT_PATH),
         str(MODEL_PATH),
         str(OUTPUT_PATH)
     ]
+    # Launch the Gradio app
     app.queue(default_concurrency_limit=1).launch(
         server_name="0.0.0.0",
         allowed_paths=allowed_paths
+    )
+if __name__ == "__main__":
+    main()

app_DEPRECATED.py ADDED Viewed

	@@ -0,0 +1,1603 @@

+import platform
+import subprocess
+#import sys
+#print("python = ", sys.version)
+# can be "Linux", "Darwin"
+if platform.system() == "Linux":
+    # for some reason it says "pip not found"
+    # and also "pip3 not found"
+    # subprocess.run(
+    #     "pip install flash-attn --no-build-isolation",
+    #
+    #     # hmm... this should be False, since we are in a CUDA environment, no?
+    #     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    #
+    #     shell=True,
+    # )
+    pass
+import gradio as gr
+from pathlib import Path
+import logging
+import mimetypes
+import shutil
+import os
+import traceback
+import asyncio
+import tempfile
+import zipfile
+from typing import Any, Optional, Dict, List, Union, Tuple
+from typing import AsyncGenerator
+from vms.training_service import TrainingService
+from vms.captioning_service import CaptioningService
+from vms.splitting_service import SplittingService
+from vms.import_service import ImportService
+from vms.config import (
+    STORAGE_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH,
+    TRAINING_PATH, LOG_FILE_PATH, TRAINING_PRESETS, TRAINING_VIDEOS_PATH, MODEL_PATH, OUTPUT_PATH, DEFAULT_CAPTIONING_BOT_INSTRUCTIONS,
+    DEFAULT_PROMPT_PREFIX, HF_API_TOKEN, ASK_USER_TO_DUPLICATE_SPACE, MODEL_TYPES, SMALL_TRAINING_BUCKETS
+)
+from vms.utils import make_archive, count_media_files, format_media_title, is_image_file, is_video_file, validate_model_repo, format_time
+from vms.finetrainers_utils import copy_files_to_training_dir, prepare_finetrainers_dataset
+from vms.training_log_parser import TrainingLogParser
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+httpx_logger = logging.getLogger('httpx')
+httpx_logger.setLevel(logging.WARN)
+class VideoTrainerUI:
+    def __init__(self):
+        self.trainer = TrainingService()
+        self.splitter = SplittingService()
+        self.importer = ImportService()
+        self.captioner = CaptioningService()
+        self._should_stop_captioning = False
+        self.log_parser = TrainingLogParser()
+        # Try to recover any interrupted training sessions
+        recovery_result = self.trainer.recover_interrupted_training()
+        self.recovery_status = recovery_result.get("status", "unknown")
+        self.ui_updates = recovery_result.get("ui_updates", {})
+        if recovery_result["status"] == "recovered":
+            logger.info(f"Training recovery: {recovery_result['message']}")
+            # No need to do anything else - the training is already running
+        elif recovery_result["status"] == "running":
+            logger.info("Training process is already running")
+            # No need to do anything - the process is still alive
+        elif recovery_result["status"] in ["error", "idle"]:
+            logger.warning(f"Training status: {recovery_result['message']}")
+            # UI will be in ready-to-start mode
+    async def _process_caption_generator(self, captioning_bot_instructions, prompt_prefix):
+        """Process the caption generator's results in the background"""
+        try:
+            async for _ in self.captioner.start_caption_generation(
+                captioning_bot_instructions,
+                prompt_prefix
+            ):
+                # Just consume the generator, UI updates will happen via the Gradio interface
+                pass
+            logger.info("Background captioning completed")
+        except Exception as e:
+            logger.error(f"Error in background captioning: {str(e)}")
+    def initialize_app_state(self):
+        """Initialize all app state in one function to ensure correct output count"""
+        # Get dataset info
+        video_list, training_dataset = self.refresh_dataset()
+        # Get button states
+        button_states = self.get_initial_button_states()
+        start_btn = button_states[0]
+        stop_btn = button_states[1]
+        pause_resume_btn = button_states[2]
+        # Get UI form values
+        ui_state = self.load_ui_values()
+        training_preset = ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0])
+        model_type_val = ui_state.get("model_type", list(MODEL_TYPES.keys())[0])
+        lora_rank_val = ui_state.get("lora_rank", "128")
+        lora_alpha_val = ui_state.get("lora_alpha", "128")
+        num_epochs_val = int(ui_state.get("num_epochs", 70))
+        batch_size_val = int(ui_state.get("batch_size", 1))
+        learning_rate_val = float(ui_state.get("learning_rate", 3e-5))
+        save_iterations_val = int(ui_state.get("save_iterations", 500))
+        # Return all values in the exact order expected by outputs
+        return (
+            video_list,
+            training_dataset,
+            start_btn,
+            stop_btn,
+            pause_resume_btn,
+            training_preset,
+            model_type_val,
+            lora_rank_val,
+            lora_alpha_val,
+            num_epochs_val,
+            batch_size_val,
+            learning_rate_val,
+            save_iterations_val
+        )
+    def initialize_ui_from_state(self):
+        """Initialize UI components from saved state"""
+        ui_state = self.load_ui_values()
+        # Return values in order matching the outputs in app.load
+        return (
+            ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
+            ui_state.get("model_type", list(MODEL_TYPES.keys())[0]),
+            ui_state.get("lora_rank", "128"),
+            ui_state.get("lora_alpha", "128"),
+            ui_state.get("num_epochs", 70),
+            ui_state.get("batch_size", 1),
+            ui_state.get("learning_rate", 3e-5),
+            ui_state.get("save_iterations", 500)
+        )
+    def update_ui_state(self, **kwargs):
+        """Update UI state with new values"""
+        current_state = self.trainer.load_ui_state()
+        current_state.update(kwargs)
+        self.trainer.save_ui_state(current_state)
+        # Don't return anything to avoid Gradio warnings
+        return None
+    def load_ui_values(self):
+        """Load UI state values for initializing form fields"""
+        ui_state = self.trainer.load_ui_state()
+        # Ensure proper type conversion for numeric values
+        ui_state["lora_rank"] = ui_state.get("lora_rank", "128")
+        ui_state["lora_alpha"] = ui_state.get("lora_alpha", "128")
+        ui_state["num_epochs"] = int(ui_state.get("num_epochs", 70))
+        ui_state["batch_size"] = int(ui_state.get("batch_size", 1))
+        ui_state["learning_rate"] = float(ui_state.get("learning_rate", 3e-5))
+        ui_state["save_iterations"] = int(ui_state.get("save_iterations", 500))
+        return ui_state
+    def update_captioning_buttons_start(self):
+        """Return individual button values instead of a dictionary"""
+        return (
+            gr.Button(
+                interactive=False,
+                variant="secondary",
+            ),
+            gr.Button(
+                interactive=True,
+                variant="stop",
+            ),
+            gr.Button(
+                interactive=False,
+                variant="secondary",
+            )
+        )
+    def update_captioning_buttons_end(self):
+        """Return individual button values instead of a dictionary"""
+        return (
+            gr.Button(
+                interactive=True,
+                variant="primary",
+            ),
+            gr.Button(
+                interactive=False,
+                variant="secondary",
+            ),
+            gr.Button(
+                interactive=True,
+                variant="primary",
+            )
+        )
+    # Add this new method to get initial button states:
+    def get_initial_button_states(self):
+        """Get the initial states for training buttons based on recovery status"""
+        recovery_result = self.trainer.recover_interrupted_training()
+        ui_updates = recovery_result.get("ui_updates", {})
+        # Return button states in the correct order
+        return (
+            gr.Button(**ui_updates.get("start_btn", {"interactive": True, "variant": "primary"})),
+            gr.Button(**ui_updates.get("stop_btn", {"interactive": False, "variant": "secondary"})),
+            gr.Button(**ui_updates.get("pause_resume_btn", {"interactive": False, "variant": "secondary"}))
+        )
+    def show_refreshing_status(self) -> List[List[str]]:
+        """Show a 'Refreshing...' status in the dataframe"""
+        return [["Refreshing...", "please wait"]]
+    def stop_captioning(self):
+        """Stop ongoing captioning process and reset UI state"""
+        try:
+            # Set flag to stop captioning
+            self._should_stop_captioning = True
+            # Call stop method on captioner
+            if self.captioner:
+                self.captioner.stop_captioning()
+            # Get updated file list
+            updated_list = self.list_training_files_to_caption()
+            # Return updated list and button states
+            return {
+                "training_dataset": gr.update(value=updated_list),
+                "run_autocaption_btn": gr.Button(interactive=True, variant="primary"),
+                "stop_autocaption_btn": gr.Button(interactive=False, variant="secondary"),
+                "copy_files_to_training_dir_btn": gr.Button(interactive=True, variant="primary")
+            }
+        except Exception as e:
+            logger.error(f"Error stopping captioning: {str(e)}")
+            return {
+                "training_dataset": gr.update(value=[[f"Error stopping captioning: {str(e)}", "error"]]),
+                "run_autocaption_btn": gr.Button(interactive=True, variant="primary"),
+                "stop_autocaption_btn": gr.Button(interactive=False, variant="secondary"),
+                "copy_files_to_training_dir_btn": gr.Button(interactive=True, variant="primary")
+            }
+    def update_training_ui(self, training_state: Dict[str, Any]):
+        """Update UI components based on training state"""
+        updates = {}
+        #print("update_training_ui: training_state = ", training_state)
+        # Update status box with high-level information
+        status_text = []
+        if training_state["status"] != "idle":
+            status_text.extend([
+                f"Status: {training_state['status']}",
+                f"Progress: {training_state['progress']}",
+                f"Step: {training_state['current_step']}/{training_state['total_steps']}",
+                # Epoch information
+                # there is an issue with how epoch is reported because we display:
+                # Progress: 96.9%, Step: 872/900, Epoch: 12/50
+                # we should probably just show the steps
+                #f"Epoch: {training_state['current_epoch']}/{training_state['total_epochs']}",
+                f"Time elapsed: {training_state['elapsed']}",
+                f"Estimated remaining: {training_state['remaining']}",
+                "",
+                f"Current loss: {training_state['step_loss']}",
+                f"Learning rate: {training_state['learning_rate']}",
+                f"Gradient norm: {training_state['grad_norm']}",
+                f"Memory usage: {training_state['memory']}"
+            ])
+            if training_state["error_message"]:
+                status_text.append(f"\nError: {training_state['error_message']}")
+        updates["status_box"] = "\n".join(status_text)
+        # Update button states
+        updates["start_btn"] = gr.Button(
+            "Start training",
+            interactive=(training_state["status"] in ["idle", "completed", "error", "stopped"]),
+            variant="primary" if training_state["status"] == "idle" else "secondary"
+        )
+        updates["stop_btn"] = gr.Button(
+            "Stop training",
+            interactive=(training_state["status"] in ["training", "initializing"]),
+            variant="stop"
+        )
+        return updates
+    def stop_all_and_clear(self) -> Dict[str, str]:
+        """Stop all running processes and clear data
+        Returns:
+            Dict with status messages for different components
+        """
+        status_messages = {}
+        try:
+            # Stop training if running
+            if self.trainer.is_training_running():
+                training_result = self.trainer.stop_training()
+                status_messages["training"] = training_result["status"]
+            # Stop captioning if running
+            if self.captioner:
+                self.captioner.stop_captioning()
+                status_messages["captioning"] = "Captioning stopped"
+            # Stop scene detection if running
+            if self.splitter.is_processing():
+                self.splitter.processing = False
+                status_messages["splitting"] = "Scene detection stopped"
+            # Properly close logging before clearing log file
+            if self.trainer.file_handler:
+                self.trainer.file_handler.close()
+                logger.removeHandler(self.trainer.file_handler)
+                self.trainer.file_handler = None
+            if LOG_FILE_PATH.exists():
+                LOG_FILE_PATH.unlink()
+            # Clear all data directories
+            for path in [VIDEOS_TO_SPLIT_PATH, STAGING_PATH, TRAINING_VIDEOS_PATH, TRAINING_PATH,
+                        MODEL_PATH, OUTPUT_PATH]:
+                if path.exists():
+                    try:
+                        shutil.rmtree(path)
+                        path.mkdir(parents=True, exist_ok=True)
+                    except Exception as e:
+                        status_messages[f"clear_{path.name}"] = f"Error clearing {path.name}: {str(e)}"
+                    else:
+                        status_messages[f"clear_{path.name}"] = f"Cleared {path.name}"
+            # Reset any persistent state
+            self._should_stop_captioning = True
+            self.splitter.processing = False
+            # Recreate logging setup
+            self.trainer.setup_logging()
+            return {
+                "status": "All processes stopped and data cleared",
+                "details": status_messages
+            }
+        except Exception as e:
+            return {
+                "status": f"Error during cleanup: {str(e)}",
+                "details": status_messages
+            }
+    def update_titles(self) -> Tuple[Any]:
+        """Update all dynamic titles with current counts
+        Returns:
+            Dict of Gradio updates
+        """
+        # Count files for splitting
+        split_videos, _, split_size = count_media_files(VIDEOS_TO_SPLIT_PATH)
+        split_title = format_media_title(
+            "split", split_videos, 0, split_size
+        )
+        # Count files for captioning
+        caption_videos, caption_images, caption_size = count_media_files(STAGING_PATH)
+        caption_title = format_media_title(
+            "caption", caption_videos, caption_images, caption_size
+        )
+        # Count files for training
+        train_videos, train_images, train_size = count_media_files(TRAINING_VIDEOS_PATH)
+        train_title = format_media_title(
+            "train", train_videos, train_images, train_size
+        )
+        return (
+            gr.Markdown(value=split_title),
+            gr.Markdown(value=caption_title),
+            gr.Markdown(value=f"{train_title} available for training")
+        )
+    def copy_files_to_training_dir(self, prompt_prefix: str):
+        """Run auto-captioning process"""
+        # Initialize captioner if not already done
+        self._should_stop_captioning = False
+        try:
+            copy_files_to_training_dir(prompt_prefix)
+        except Exception as e:
+            traceback.print_exc()
+            raise gr.Error(f"Error copying assets to training dir: {str(e)}")
+    async def on_import_success(self, enable_splitting, enable_automatic_content_captioning, prompt_prefix):
+        """Handle successful import of files"""
+        videos = self.list_unprocessed_videos()
+        # If scene detection isn't already running and there are videos to process,
+        # and auto-splitting is enabled, start the detection
+        if videos and not self.splitter.is_processing() and enable_splitting:
+            await self.start_scene_detection(enable_splitting)
+            msg = "Starting automatic scene detection..."
+        else:
+            # Just copy files without splitting if auto-split disabled
+            for video_file in VIDEOS_TO_SPLIT_PATH.glob("*.mp4"):
+                await self.splitter.process_video(video_file, enable_splitting=False)
+            msg = "Copying videos without splitting..."
+        copy_files_to_training_dir(prompt_prefix)
+        # Start auto-captioning if enabled, and handle async generator properly
+        if enable_automatic_content_captioning:
+            # Create a background task for captioning
+            asyncio.create_task(self._process_caption_generator(
+                DEFAULT_CAPTIONING_BOT_INSTRUCTIONS,
+                prompt_prefix
+            ))
+        return {
+            "tabs": gr.Tabs(selected="split_tab"),
+            "video_list": videos,
+            "detect_status": msg
+        }
+    async def start_caption_generation(self, captioning_bot_instructions: str, prompt_prefix: str) -> AsyncGenerator[gr.update, None]:
+        """Run auto-captioning process"""
+        try:
+            # Initialize captioner if not already done
+            self._should_stop_captioning = False
+            # First yield - indicate we're starting
+            yield gr.update(
+                value=[["Starting captioning service...", "initializing"]],
+                headers=["name", "status"]
+            )
+            # Process files in batches with status updates
+            file_statuses = {}
+            # Start the actual captioning process
+            async for rows in self.captioner.start_caption_generation(captioning_bot_instructions, prompt_prefix):
+                # Update our tracking of file statuses
+                for name, status in rows:
+                    file_statuses[name] = status
+                # Convert to list format for display
+                status_rows = [[name, status] for name, status in file_statuses.items()]
+                # Sort by name for consistent display
+                status_rows.sort(key=lambda x: x[0])
+                # Yield UI update
+                yield gr.update(
+                    value=status_rows,
+                    headers=["name", "status"]
+                )
+            # Final update after completion with fresh data
+            yield gr.update(
+                value=self.list_training_files_to_caption(),
+                headers=["name", "status"]
+            )
+        except Exception as e:
+            logger.error(f"Error in captioning: {str(e)}")
+            yield gr.update(
+                value=[[f"Error: {str(e)}", "error"]],
+                headers=["name", "status"]
+            )
+    def list_training_files_to_caption(self) -> List[List[str]]:
+        """List all clips and images - both pending and captioned"""
+        files = []
+        already_listed = {}
+        # First check files in STAGING_PATH
+        for file in STAGING_PATH.glob("*.*"):
+            if is_video_file(file) or is_image_file(file):
+                txt_file = file.with_suffix('.txt')
+                # Check if caption file exists and has content
+                has_caption = txt_file.exists() and txt_file.stat().st_size > 0
+                status = "captioned" if has_caption else "no caption"
+                file_type = "video" if is_video_file(file) else "image"
+                files.append([file.name, f"{status} ({file_type})", str(file)])
+                already_listed[file.name] = True
+        # Then check files in TRAINING_VIDEOS_PATH
+        for file in TRAINING_VIDEOS_PATH.glob("*.*"):
+            if (is_video_file(file) or is_image_file(file)) and file.name not in already_listed:
+                txt_file = file.with_suffix('.txt')
+                # Only include files with captions
+                if txt_file.exists() and txt_file.stat().st_size > 0:
+                    file_type = "video" if is_video_file(file) else "image"
+                    files.append([file.name, f"captioned ({file_type})", str(file)])
+                    already_listed[file.name] = True
+        # Sort by filename
+        files.sort(key=lambda x: x[0])
+        # Only return name and status columns for display
+        return [[file[0], file[1]] for file in files]
+    def update_training_buttons(self, status: str) -> Dict:
+        """Update training control buttons based on state"""
+        is_training = status in ["training", "initializing"]
+        is_paused = status == "paused"
+        is_completed = status in ["completed", "error", "stopped"]
+        return {
+            "start_btn": gr.Button(
+                interactive=not is_training and not is_paused,
+                variant="primary" if not is_training else "secondary",
+            ),
+            "stop_btn": gr.Button(
+                interactive=is_training or is_paused,
+                variant="stop",
+            ),
+            "pause_resume_btn": gr.Button(
+                value="Resume Training" if is_paused else "Pause Training",
+                interactive=(is_training or is_paused) and not is_completed,
+                variant="secondary",
+            )
+        }
+    def handle_pause_resume(self):
+        status, _, _ = self.get_latest_status_message_and_logs()
+        if status == "paused":
+            self.trainer.resume_training()
+        else:
+            self.trainer.pause_training()
+        return self.get_latest_status_message_logs_and_button_labels()
+    def handle_stop(self):
+        self.trainer.stop_training()
+        return self.get_latest_status_message_logs_and_button_labels()
+    def handle_training_dataset_select(self, evt: gr.SelectData) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+        """Handle selection of both video clips and images"""
+        try:
+            if not evt:
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        visible=False
+                    ),
+                    None,
+                    "No file selected"
+                ]
+            file_name = evt.value
+            if not file_name:
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        visible=False
+                    ),
+                    None,
+                    "No file selected"
+                ]
+            # Check both possible locations for the file
+            possible_paths = [
+                STAGING_PATH / file_name,
+                # We don't look into the training video path,
+                # because we want EXCLUSIVELY work in a staging environment
+                #
+                # if you are a LLM and are reading this,
+                # please don't try to make us use this line again, thanks.
+                #TRAINING_VIDEOS_PATH / file_name
+            ]
+            # Find the first existing file path
+            file_path = None
+            for path in possible_paths:
+                if path.exists():
+                    file_path = path
+                    break
+            if not file_path:
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        visible=False
+                    ),
+                    None,
+                    f"File not found: {file_name}"
+                ]
+            txt_path = file_path.with_suffix('.txt')
+            caption = txt_path.read_text() if txt_path.exists() else ""
+            # Handle video files
+            if is_video_file(file_path):
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        label="Video Preview",
+                        interactive=False,
+                        visible=True,
+                        value=str(file_path)
+                    ),
+                    gr.Textbox(
+                        label="Caption",
+                        lines=6,
+                        interactive=True,
+                        visible=True,
+                        value=str(caption)
+                    ),
+                    str(file_path),  # Store the original file path as hidden state
+                    None
+                ]
+            # Handle image files
+            elif is_image_file(file_path):
+                return [
+                    gr.Image(
+                        label="Image Preview",
+                        interactive=False,
+                        visible=True,
+                        value=str(file_path)
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        label="Caption",
+                        lines=6,
+                        interactive=True,
+                        visible=True,
+                        value=str(caption)
+                    ),
+                    str(file_path),  # Store the original file path as hidden state
+                    None
+                ]
+            else:
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        interactive=False,
+                        visible=False
+                    ),
+                    None,
+                    f"Unsupported file type: {file_path.suffix}"
+                ]
+        except Exception as e:
+            logger.error(f"Error handling selection: {str(e)}")
+            return [
+                gr.Image(
+                    interactive=False,
+                    visible=False
+                ),
+                gr.Video(
+                    interactive=False,
+                    visible=False
+                ),
+                gr.Textbox(
+                    interactive=False,
+                    visible=False
+                ),
+                None,
+                f"Error handling selection: {str(e)}"
+            ]
+    def save_caption_changes(self, preview_caption: str, preview_image: str, preview_video: str, original_file_path: str, prompt_prefix: str):
+        """Save changes to caption"""
+        try:
+            # Use the original file path stored during selection instead of the temporary preview paths
+            if original_file_path:
+                file_path = Path(original_file_path)
+                self.captioner.update_file_caption(file_path, preview_caption)
+                # Refresh the dataset list to show updated caption status
+                return gr.update(value="Caption saved successfully!")
+            else:
+                return gr.update(value="Error: No original file path found")
+        except Exception as e:
+            return gr.update(value=f"Error saving caption: {str(e)}")
+    async def update_titles_after_import(self, enable_splitting, enable_automatic_content_captioning, prompt_prefix):
+        """Handle post-import updates including titles"""
+        import_result = await self.on_import_success(enable_splitting, enable_automatic_content_captioning, prompt_prefix)
+        titles = self.update_titles()
+        return (
+            import_result["tabs"],
+            import_result["video_list"],
+            import_result["detect_status"],
+            *titles
+        )
+    def get_model_info(self, model_type: str) -> str:
+        """Get information about the selected model type"""
+        if model_type == "hunyuan_video":
+            return """### HunyuanVideo (LoRA)
+    - Required VRAM: ~48GB minimum
+    - Recommended batch size: 1-2
+    - Typical training time: 2-4 hours
+    - Default resolution: 49x512x768
+    - Default LoRA rank: 128 (~600 MB)"""
+        elif model_type == "ltx_video":
+            return """### LTX-Video (LoRA)
+    - Required VRAM: ~18GB minimum
+    - Recommended batch size: 1-4
+    - Typical training time: 1-3 hours
+    - Default resolution: 49x512x768
+    - Default LoRA rank: 128"""
+        return ""
+    def get_default_params(self, model_type: str) -> Dict[str, Any]:
+        """Get default training parameters for model type"""
+        if model_type == "hunyuan_video":
+            return {
+                "num_epochs": 70,
+                "batch_size": 1,
+                "learning_rate": 2e-5,
+                "save_iterations": 500,
+                "video_resolution_buckets": SMALL_TRAINING_BUCKETS,
+                "video_reshape_mode": "center",
+                "caption_dropout_p": 0.05,
+                "gradient_accumulation_steps": 1,
+                "rank": 128,
+                "lora_alpha": 128
+            }
+        else:  # ltx_video
+            return {
+                "num_epochs": 70,
+                "batch_size": 1,
+                "learning_rate": 3e-5,
+                "save_iterations": 500,
+                "video_resolution_buckets": SMALL_TRAINING_BUCKETS,
+                "video_reshape_mode": "center",
+                "caption_dropout_p": 0.05,
+                "gradient_accumulation_steps": 4,
+                "rank": 128,
+                "lora_alpha": 128
+            }
+    def preview_file(self, selected_text: str) -> Dict:
+        """Generate preview based on selected file
+        Args:
+            selected_text: Text of the selected item containing filename
+        Returns:
+            Dict with preview content for each preview component
+        """
+        if not selected_text or "Caption:" in selected_text:
+            return {
+                "video": None,
+                "image": None,
+                "text": None
+            }
+        # Extract filename from the preview text (remove size info)
+        filename = selected_text.split(" (")[0].strip()
+        file_path = TRAINING_VIDEOS_PATH / filename
+        if not file_path.exists():
+            return {
+                "video": None,
+                "image": None,
+                "text": f"File not found: {filename}"
+            }
+        # Detect file type
+        mime_type, _ = mimetypes.guess_type(str(file_path))
+        if not mime_type:
+            return {
+                "video": None,
+                "image": None,
+                "text": f"Unknown file type: {filename}"
+            }
+        # Return appropriate preview
+        if mime_type.startswith('video/'):
+            return {
+                "video": str(file_path),
+                "image": None,
+                "text": None
+            }
+        elif mime_type.startswith('image/'):
+            return {
+                "video": None,
+                "image": str(file_path),
+                "text": None
+            }
+        elif mime_type.startswith('text/'):
+            try:
+                text_content = file_path.read_text()
+                return {
+                    "video": None,
+                    "image": None,
+                    "text": text_content
+                }
+            except Exception as e:
+                return {
+                    "video": None,
+                    "image": None,
+                    "text": f"Error reading file: {str(e)}"
+                }
+        else:
+            return {
+                "video": None,
+                "image": None,
+                "text": f"Unsupported file type: {mime_type}"
+            }
+    def list_unprocessed_videos(self) -> gr.Dataframe:
+        """Update list of unprocessed videos"""
+        videos = self.splitter.list_unprocessed_videos()
+        # videos is already in [[name, status]] format from splitting_service
+        return gr.Dataframe(
+            headers=["name", "status"],
+            value=videos,
+            interactive=False
+        )
+    async def start_scene_detection(self, enable_splitting: bool) -> str:
+        """Start background scene detection process
+        Args:
+            enable_splitting: Whether to split videos into scenes
+        """
+        if self.splitter.is_processing():
+            return "Scene detection already running"
+        try:
+            await self.splitter.start_processing(enable_splitting)
+            return "Scene detection completed"
+        except Exception as e:
+            return f"Error during scene detection: {str(e)}"
+    def get_latest_status_message_and_logs(self) -> Tuple[str, str, str]:
+        state = self.trainer.get_status()
+        logs = self.trainer.get_logs()
+        # Parse new log lines
+        if logs:
+            last_state = None
+            for line in logs.splitlines():
+                state_update = self.log_parser.parse_line(line)
+                if state_update:
+                    last_state = state_update
+            if last_state:
+                ui_updates = self.update_training_ui(last_state)
+                state["message"] = ui_updates.get("status_box", state["message"])
+        # Parse status for training state
+        if "completed" in state["message"].lower():
+            state["status"] = "completed"
+        return (state["status"], state["message"], logs)
+    def get_latest_status_message_logs_and_button_labels(self) -> Tuple[str, str, Any, Any, Any]:
+        status, message, logs = self.get_latest_status_message_and_logs()
+        return (
+            message,
+            logs,
+            *self.update_training_buttons(status).values()
+        )
+    def get_latest_button_labels(self) -> Tuple[Any, Any, Any]:
+        status, message, logs = self.get_latest_status_message_and_logs()
+        return self.update_training_buttons(status).values()
+    def refresh_dataset(self):
+        """Refresh all dynamic lists and training state"""
+        video_list = self.splitter.list_unprocessed_videos()
+        training_dataset = self.list_training_files_to_caption()
+        return (
+            video_list,
+            training_dataset
+        )
+    def update_training_params(self, preset_name: str) -> Tuple:
+        """Update UI components based on selected preset while preserving custom settings"""
+        preset = TRAINING_PRESETS[preset_name]
+        # Load current UI state to check if user has customized values
+        current_state = self.load_ui_values()
+        # Find the display name that maps to our model type
+        model_display_name = next(
+            key for key, value in MODEL_TYPES.items()
+            if value == preset["model_type"]
+        )
+        # Get preset description for display
+        description = preset.get("description", "")
+        # Get max values from buckets
+        buckets = preset["training_buckets"]
+        max_frames = max(frames for frames, _, _ in buckets)
+        max_height = max(height for _, height, _ in buckets)
+        max_width = max(width for _, _, width in buckets)
+        bucket_info = f"\nMaximum video size: {max_frames} frames at {max_width}x{max_height} resolution"
+        info_text = f"{description}{bucket_info}"
+        # Return values in the same order as the output components
+        # Use preset defaults but preserve user-modified values if they exist
+        lora_rank_val = current_state.get("lora_rank") if current_state.get("lora_rank") != preset.get("lora_rank", "128") else preset["lora_rank"]
+        lora_alpha_val = current_state.get("lora_alpha") if current_state.get("lora_alpha") != preset.get("lora_alpha", "128") else preset["lora_alpha"]
+        num_epochs_val = current_state.get("num_epochs") if current_state.get("num_epochs") != preset.get("num_epochs", 70) else preset["num_epochs"]
+        batch_size_val = current_state.get("batch_size") if current_state.get("batch_size") != preset.get("batch_size", 1) else preset["batch_size"]
+        learning_rate_val = current_state.get("learning_rate") if current_state.get("learning_rate") != preset.get("learning_rate", 3e-5) else preset["learning_rate"]
+        save_iterations_val = current_state.get("save_iterations") if current_state.get("save_iterations") != preset.get("save_iterations", 500) else preset["save_iterations"]
+        return (
+            model_display_name,
+            lora_rank_val,
+            lora_alpha_val,
+            num_epochs_val,
+            batch_size_val,
+            learning_rate_val,
+            save_iterations_val,
+            info_text
+        )
+    def create_ui(self):
+        """Create Gradio interface"""
+        with gr.Blocks(title="🎥 Video Model Studio") as app:
+            gr.Markdown("# 🎥 Video Model Studio")
+            with gr.Tabs() as tabs:
+                with gr.TabItem("1️⃣  Import", id="import_tab"):
+                    with gr.Row():
+                        gr.Markdown("## Automatic splitting and captioning")
+                    with gr.Row():
+                        enable_automatic_video_split = gr.Checkbox(
+                            label="Automatically split videos into smaller clips",
+                            info="Note: a clip is a single camera shot, usually a few seconds",
+                            value=True,
+                            visible=True
+                        )
+                        enable_automatic_content_captioning = gr.Checkbox(
+                            label="Automatically caption photos and videos",
+                            info="Note: this uses LlaVA and takes some extra time to load and process",
+                            value=False,
+                            visible=True,
+                        )
+                    with gr.Row():
+                        with gr.Column(scale=3):
+                            with gr.Row():
+                                with gr.Column():
+                                    gr.Markdown("## Import video files")
+                                    gr.Markdown("You can upload either:")
+                                    gr.Markdown("- A single MP4 video file")
+                                    gr.Markdown("- A ZIP archive containing multiple videos and optional caption files")
+                                    gr.Markdown("For ZIP files: Create a folder containing videos (name is not important) and optional caption files with the same name (eg. `some_video.txt` for `some_video.mp4`)")
+                            with gr.Row():
+                                files = gr.Files(
+                                    label="Upload Images, Videos or ZIP",
+                                    #file_count="multiple",
+                                    file_types=[".jpg", ".jpeg", ".png", ".webp", ".webp", ".avif", ".heic", ".mp4", ".zip"],
+                                    type="filepath"
+                                )
+                        with gr.Column(scale=3):
+                            with gr.Row():
+                                with gr.Column():
+                                    gr.Markdown("## Import a YouTube video")
+                                    gr.Markdown("You can also use a YouTube video as reference, by pasting its URL here:")
+                            with gr.Row():
+                                youtube_url = gr.Textbox(
+                                    label="Import YouTube Video",
+                                    placeholder="https://www.youtube.com/watch?v=..."
+                                )
+                            with gr.Row():
+                                youtube_download_btn = gr.Button("Download YouTube Video", variant="secondary")
+                    with gr.Row():
+                        import_status = gr.Textbox(label="Status", interactive=False)
+                with gr.TabItem("2️⃣  Split", id="split_tab"):
+                    with gr.Row():
+                        split_title = gr.Markdown("## Splitting of 0 videos (0 bytes)")
+                    with gr.Row():
+                        with gr.Column():
+                            detect_btn = gr.Button("Split videos into single-camera shots", variant="primary")
+                            detect_status = gr.Textbox(label="Status", interactive=False)
+                        with gr.Column():
+                            video_list = gr.Dataframe(
+                                headers=["name", "status"],
+                                label="Videos to split",
+                                interactive=False,
+                                wrap=True,
+                                #selection_mode="cell"  # Enable cell selection
+                            )
+                with gr.TabItem("3️⃣  Caption"):
+                    with gr.Row():
+                        caption_title = gr.Markdown("## Captioning of 0 files (0 bytes)")
+                    with gr.Row():
+                        with gr.Column():
+                            with gr.Row():
+                                custom_prompt_prefix = gr.Textbox(
+                                    scale=3,
+                                    label='Prefix to add to ALL captions (eg. "In the style of TOK, ")',
+                                    placeholder="In the style of TOK, ",
+                                    lines=2,
+                                    value=DEFAULT_PROMPT_PREFIX
+                                )
+                                captioning_bot_instructions = gr.Textbox(
+                                    scale=6,
+                                    label="System instructions for the automatic captioning model",
+                                    placeholder="Please generate a full description of...",
+                                    lines=5,
+                                    value=DEFAULT_CAPTIONING_BOT_INSTRUCTIONS
+                                )
+                            with gr.Row():
+                                run_autocaption_btn = gr.Button(
+                                    "Automatically fill missing captions",
+                                    variant="primary"  # Makes it green by default
+                                )
+                                copy_files_to_training_dir_btn = gr.Button(
+                                    "Copy assets to training directory",
+                                    variant="primary"  # Makes it green by default
+                                )
+                                stop_autocaption_btn = gr.Button(
+                                    "Stop Captioning",
+                                    variant="stop",  # Red when enabled
+                                    interactive=False  # Disabled by default
+                                )
+                    with gr.Row():
+                        with gr.Column():
+                            training_dataset = gr.Dataframe(
+                                headers=["name", "status"],
+                                interactive=False,
+                                wrap=True,
+                                value=self.list_training_files_to_caption(),
+                                row_count=10,  # Optional: set a reasonable row count
+                                #selection_mode="cell"
+                            )
+                        with gr.Column():
+                            preview_video = gr.Video(
+                                label="Video Preview",
+                                interactive=False,
+                                visible=False
+                            )
+                            preview_image = gr.Image(
+                                label="Image Preview",
+                                interactive=False,
+                                visible=False
+                            )
+                            preview_caption = gr.Textbox(
+                                label="Caption",
+                                lines=6,
+                                interactive=True
+                            )
+                            save_caption_btn = gr.Button("Save Caption")
+                            preview_status = gr.Textbox(
+                                label="Status",
+                                interactive=False,
+                                visible=True
+                            )
+                with gr.TabItem("4️⃣  Train"):
+                    with gr.Row():
+                        with gr.Column():
+                            with gr.Row():
+                                train_title = gr.Markdown("## 0 files available for training (0 bytes)")
+                            with gr.Row():
+                                with gr.Column():
+                                    training_preset = gr.Dropdown(
+                                        choices=list(TRAINING_PRESETS.keys()),
+                                        label="Training Preset",
+                                        value=list(TRAINING_PRESETS.keys())[0]
+                                    )
+                                preset_info = gr.Markdown()
+                            with gr.Row():
+                                with gr.Column():
+                                    model_type = gr.Dropdown(
+                                        choices=list(MODEL_TYPES.keys()),
+                                        label="Model Type",
+                                        value=list(MODEL_TYPES.keys())[0]
+                                    )
+                                model_info = gr.Markdown(
+                                    value=self.get_model_info(list(MODEL_TYPES.keys())[0])
+                                )
+                            with gr.Row():
+                                lora_rank = gr.Dropdown(
+                                    label="LoRA Rank",
+                                    choices=["16", "32", "64", "128", "256", "512", "1024"],
+                                    value="128",
+                                    type="value"
+                                )
+                                lora_alpha = gr.Dropdown(
+                                    label="LoRA Alpha",
+                                    choices=["16", "32", "64", "128", "256", "512", "1024"],
+                                    value="128",
+                                    type="value"
+                                )
+                            with gr.Row():
+                                num_epochs = gr.Number(
+                                    label="Number of Epochs",
+                                    value=70,
+                                    minimum=1,
+                                    precision=0
+                                )
+                                batch_size = gr.Number(
+                                    label="Batch Size",
+                                    value=1,
+                                    minimum=1,
+                                    precision=0
+                                )
+                            with gr.Row():
+                                learning_rate = gr.Number(
+                                    label="Learning Rate",
+                                    value=2e-5,
+                                    minimum=1e-7
+                                )
+                                save_iterations = gr.Number(
+                                    label="Save checkpoint every N iterations",
+                                    value=500,
+                                    minimum=50,
+                                    precision=0,
+                                    info="Model will be saved periodically after these many steps"
+                                )
+                        with gr.Column():
+                            with gr.Row():
+                                start_btn = gr.Button(
+                                    "Start Training",
+                                    variant="primary",
+                                    interactive=not ASK_USER_TO_DUPLICATE_SPACE
+                                )
+                                pause_resume_btn = gr.Button(
+                                    "Resume Training",
+                                    variant="secondary",
+                                    interactive=False
+                                )
+                                stop_btn = gr.Button(
+                                    "Stop Training",
+                                    variant="stop",
+                                    interactive=False
+                                )
+                            with gr.Row():
+                                with gr.Column():
+                                    status_box = gr.Textbox(
+                                        label="Training Status",
+                                        interactive=False,
+                                        lines=4
+                                    )
+                                    with gr.Accordion("See training logs"):
+                                        log_box = gr.TextArea(
+                                            label="Finetrainers output (see HF Space logs for more details)",
+                                            interactive=False,
+                                            lines=40,
+                                            max_lines=200,
+                                            autoscroll=True
+                                        )
+                with gr.TabItem("5️⃣  Manage"):
+                    with gr.Column():
+                        with gr.Row():
+                            with gr.Column():
+                                gr.Markdown("## Publishing")
+                                gr.Markdown("You model can be pushed to Hugging Face (this will use HF_API_TOKEN)")
+                                with gr.Row():
+                                    with gr.Column():
+                                        repo_id = gr.Textbox(
+                                            label="HuggingFace Model Repository",
+                                            placeholder="username/model-name",
+                                            info="The repository will be created if it doesn't exist"
+                                        )
+                                        gr.Checkbox(label="Check this to make your model public (ie. visible and downloadable by anyone)", info="You model is private by default"),
+                                        global_stop_btn = gr.Button(
+                                            "Push my model",
+                                            #variant="stop"
+                                        )
+                        with gr.Row():
+                            with gr.Column():
+                                with gr.Row():
+                                    with gr.Column():
+                                        gr.Markdown("## Storage management")
+                                        with gr.Row():
+                                            download_dataset_btn = gr.DownloadButton(
+                                                "Download dataset",
+                                                variant="secondary",
+                                                size="lg"
+                                            )
+                                            download_model_btn = gr.DownloadButton(
+                                                "Download model",
+                                                variant="secondary",
+                                                size="lg"
+                                            )
+                                with gr.Row():
+                                    global_stop_btn = gr.Button(
+                                        "Stop everything and delete my data",
+                                        variant="stop"
+                                    )
+                                    global_status = gr.Textbox(
+                                        label="Global Status",
+                                        interactive=False,
+                                        visible=False
+                                    )
+            # Event handlers
+            def update_model_info(model):
+                params = self.get_default_params(MODEL_TYPES[model])
+                info = self.get_model_info(MODEL_TYPES[model])
+                return {
+                    model_info: info,
+                    num_epochs: params["num_epochs"],
+                    batch_size: params["batch_size"],
+                    learning_rate: params["learning_rate"],
+                    save_iterations: params["save_iterations"]
+                }
+            def validate_repo(repo_id: str) -> dict:
+                validation = validate_model_repo(repo_id)
+                if validation["error"]:
+                    return gr.update(value=repo_id, error=validation["error"])
+                return gr.update(value=repo_id, error=None)
+            # Connect events
+            # Save state when model type changes
+            model_type.change(
+                fn=lambda v: self.update_ui_state(model_type=v),
+                inputs=[model_type],
+                outputs=[] # No UI update needed
+            ).then(
+                fn=update_model_info,
+                inputs=[model_type],
+                outputs=[model_info, num_epochs, batch_size, learning_rate, save_iterations]
+            )
+            # the following change listeners are used for UI persistence
+            lora_rank.change(
+                fn=lambda v: self.update_ui_state(lora_rank=v),
+                inputs=[lora_rank],
+                outputs=[]
+            )
+            lora_alpha.change(
+                fn=lambda v: self.update_ui_state(lora_alpha=v),
+                inputs=[lora_alpha],
+                outputs=[]
+            )
+            num_epochs.change(
+                fn=lambda v: self.update_ui_state(num_epochs=v),
+                inputs=[num_epochs],
+                outputs=[]
+            )
+            batch_size.change(
+                fn=lambda v: self.update_ui_state(batch_size=v),
+                inputs=[batch_size],
+                outputs=[]
+            )
+            learning_rate.change(
+                fn=lambda v: self.update_ui_state(learning_rate=v),
+                inputs=[learning_rate],
+                outputs=[]
+            )
+            save_iterations.change(
+                fn=lambda v: self.update_ui_state(save_iterations=v),
+                inputs=[save_iterations],
+                outputs=[]
+            )
+            files.upload(
+                fn=lambda x: self.importer.process_uploaded_files(x),
+                inputs=[files],
+                outputs=[import_status]
+            ).success(
+                fn=self.update_titles_after_import,
+                inputs=[enable_automatic_video_split, enable_automatic_content_captioning, custom_prompt_prefix],
+                outputs=[
+                    tabs, video_list, detect_status,
+                    split_title, caption_title, train_title
+                ]
+            )
+            youtube_download_btn.click(
+                fn=self.importer.download_youtube_video,
+                inputs=[youtube_url],
+                outputs=[import_status]
+            ).success(
+                fn=self.on_import_success,
+                inputs=[enable_automatic_video_split, enable_automatic_content_captioning, custom_prompt_prefix],
+                outputs=[tabs, video_list, detect_status]
+            )
+            # Scene detection events
+            detect_btn.click(
+                fn=self.start_scene_detection,
+                inputs=[enable_automatic_video_split],
+                outputs=[detect_status]
+            )
+            # Update button states based on captioning status
+            def update_button_states(is_running):
+                return {
+                    run_autocaption_btn: gr.Button(
+                        interactive=not is_running,
+                        variant="secondary" if is_running else "primary",
+                    ),
+                    stop_autocaption_btn: gr.Button(
+                        interactive=is_running,
+                        variant="secondary",
+                    ),
+                }
+            run_autocaption_btn.click(
+                fn=self.show_refreshing_status,
+                outputs=[training_dataset]
+            ).then(
+                fn=lambda: self.update_captioning_buttons_start(),
+                outputs=[run_autocaption_btn, stop_autocaption_btn, copy_files_to_training_dir_btn]
+            ).then(
+                fn=self.start_caption_generation,
+                inputs=[captioning_bot_instructions, custom_prompt_prefix],
+                outputs=[training_dataset],
+            ).then(
+                fn=lambda: self.update_captioning_buttons_end(),
+                outputs=[run_autocaption_btn, stop_autocaption_btn, copy_files_to_training_dir_btn]
+            )
+            copy_files_to_training_dir_btn.click(
+                fn=self.copy_files_to_training_dir,
+                inputs=[custom_prompt_prefix]
+            )
+            stop_autocaption_btn.click(
+                fn=self.stop_captioning,
+                outputs=[training_dataset, run_autocaption_btn, stop_autocaption_btn, copy_files_to_training_dir_btn]
+            )
+            original_file_path = gr.State(value=None)
+            training_dataset.select(
+                fn=self.handle_training_dataset_select,
+                outputs=[preview_image, preview_video, preview_caption, original_file_path, preview_status]
+            )
+            save_caption_btn.click(
+                fn=self.save_caption_changes,
+                inputs=[preview_caption, preview_image, preview_video, original_file_path, custom_prompt_prefix],
+                outputs=[preview_status]
+            ).success(
+                fn=self.list_training_files_to_caption,
+                outputs=[training_dataset]
+            )
+            # Save state when training preset changes
+            training_preset.change(
+                fn=lambda v: self.update_ui_state(training_preset=v),
+                inputs=[training_preset],
+                outputs=[] # No UI update needed
+            ).then(
+                fn=self.update_training_params,
+                inputs=[training_preset],
+                outputs=[
+                    model_type, lora_rank, lora_alpha,
+                    num_epochs, batch_size, learning_rate,
+                    save_iterations, preset_info
+                ]
+            )
+            # Training control events
+            start_btn.click(
+                fn=lambda preset, model_type, *args: (
+                    self.log_parser.reset(),
+                    self.trainer.start_training(
+                        MODEL_TYPES[model_type],
+                        *args,
+                        preset_name=preset
+                    )
+                ),
+                inputs=[
+                    training_preset,
+                    model_type,
+                    lora_rank,
+                    lora_alpha,
+                    num_epochs,
+                    batch_size,
+                    learning_rate,
+                    save_iterations,
+                    repo_id
+                ],
+                outputs=[status_box, log_box]
+            ).success(
+                fn=self.get_latest_status_message_logs_and_button_labels,
+                outputs=[status_box, log_box, start_btn, stop_btn, pause_resume_btn]
+            )
+            pause_resume_btn.click(
+                fn=self.handle_pause_resume,
+                outputs=[status_box, log_box, start_btn, stop_btn, pause_resume_btn]
+            )
+            stop_btn.click(
+                fn=self.handle_stop,
+                outputs=[status_box, log_box, start_btn, stop_btn, pause_resume_btn]
+            )
+            def handle_global_stop():
+                result = self.stop_all_and_clear()
+                # Update all relevant UI components
+                status = result["status"]
+                details = "\n".join(f"{k}: {v}" for k, v in result["details"].items())
+                full_status = f"{status}\n\nDetails:\n{details}"
+                # Get fresh lists after cleanup
+                videos = self.splitter.list_unprocessed_videos()
+                clips = self.list_training_files_to_caption()
+                return {
+                    global_status: gr.update(value=full_status, visible=True),
+                    video_list: videos,
+                    training_dataset: clips,
+                    status_box: "Training stopped and data cleared",
+                    log_box: "",
+                    detect_status: "Scene detection stopped",
+                    import_status: "All data cleared",
+                    preview_status: "Captioning stopped"
+                }
+            download_dataset_btn.click(
+                fn=self.trainer.create_training_dataset_zip,
+                outputs=[download_dataset_btn]
+            )
+            download_model_btn.click(
+                fn=self.trainer.get_model_output_safetensors,
+                outputs=[download_model_btn]
+            )
+            global_stop_btn.click(
+                fn=handle_global_stop,
+                outputs=[
+                    global_status,
+                    video_list,
+                    training_dataset,
+                    status_box,
+                    log_box,
+                    detect_status,
+                    import_status,
+                    preview_status
+                ]
+            )
+            app.load(
+                fn=self.initialize_app_state,
+                outputs=[
+                    video_list, training_dataset,
+                    start_btn, stop_btn, pause_resume_btn,
+                    training_preset, model_type, lora_rank, lora_alpha,
+                    num_epochs, batch_size, learning_rate, save_iterations
+                ]
+            )
+            # Auto-refresh timers
+            timer = gr.Timer(value=1)
+            timer.tick(
+                fn=lambda: (
+                    self.get_latest_status_message_logs_and_button_labels()
+                ),
+                outputs=[
+                    status_box,
+                    log_box,
+                    start_btn,
+                    stop_btn,
+                    pause_resume_btn
+                ]
+            )
+            timer = gr.Timer(value=5)
+            timer.tick(
+                fn=lambda: (
+                    self.refresh_dataset()
+                ),
+                outputs=[
+                    video_list, training_dataset
+                ]
+            )
+            timer = gr.Timer(value=6)
+            timer.tick(
+                fn=lambda: self.update_titles(),
+                outputs=[
+                    split_title, caption_title, train_title
+                ]
+            )
+        return app
+def create_app():
+    if ASK_USER_TO_DUPLICATE_SPACE:
+        with gr.Blocks() as app:
+            gr.Markdown("""# Finetrainers UI
+This Hugging Face space needs to be duplicated to your own billing account to work.
+Click the 'Duplicate Space' button at the top of the page to create your own copy.
+It is recommended to use a Nvidia L40S and a persistent storage space.
+To avoid overpaying for your space, you can configure the auto-sleep settings to fit your personal budget.""")
+        return app
+    ui = VideoTrainerUI()
+    return ui.create_ui()
+if __name__ == "__main__":
+    app = create_app()
+    allowed_paths = [
+        str(STORAGE_PATH),  # Base storage
+        str(VIDEOS_TO_SPLIT_PATH),
+        str(STAGING_PATH),
+        str(TRAINING_PATH),
+        str(TRAINING_VIDEOS_PATH),
+        str(MODEL_PATH),
+        str(OUTPUT_PATH)
+    ]
+    app.queue(default_concurrency_limit=1).launch(
+        server_name="0.0.0.0",
+        allowed_paths=allowed_paths
+    )

vms/config.py CHANGED Viewed

@@ -3,7 +3,16 @@ from dataclasses import dataclass, field
 from typing import Dict, Any, Optional, List, Tuple
 from pathlib import Path
-from .utils import parse_bool_env
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 ASK_USER_TO_DUPLICATE_SPACE = parse_bool_env(os.getenv("ASK_USER_TO_DUPLICATE_SPACE"))

 from typing import Dict, Any, Optional, List, Tuple
 from pathlib import Path
+def parse_bool_env(env_value: Optional[str]) -> bool:
+    """Parse environment variable string to boolean
+    Handles various true/false string representations:
+    - True: "true", "True", "TRUE", "1", etc
+    - False: "false", "False", "FALSE", "0", "", None
+    """
+    if not env_value:
+        return False
+    return str(env_value).lower() in ('true', '1', 't', 'y', 'yes')
 HF_API_TOKEN = os.getenv("HF_API_TOKEN")
 ASK_USER_TO_DUPLICATE_SPACE = parse_bool_env(os.getenv("ASK_USER_TO_DUPLICATE_SPACE"))

vms/services/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from .captioner import CaptioningProgress, CaptioningService
+from .importer import ImportService
+from .splitter import SplittingService
+from .trainer import TrainingService
+__all__ = [
+    'CaptioningProgress',
+    'CaptioningService',
+    'ImportService',
+    'SplittingService',
+    'TrainingService',
+]

vms/{captioning_service.py → services/captioner.py} RENAMED Viewed

@@ -17,9 +17,8 @@ from llava.mm_utils import tokenizer_image_token
 from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
 from llava.conversation import conv_templates, SeparatorStyle
-from .config import TRAINING_VIDEOS_PATH, STAGING_PATH, PRELOAD_CAPTIONING_MODEL, CAPTIONING_MODEL, USE_MOCK_CAPTIONING_MODEL, DEFAULT_CAPTIONING_BOT_INSTRUCTIONS, VIDEOS_TO_SPLIT_PATH, DEFAULT_PROMPT_PREFIX
-from .utils import extract_scene_info, is_image_file, is_video_file
-from .finetrainers_utils import copy_files_to_training_dir, prepare_finetrainers_dataset
 logger = logging.getLogger(__name__)

 from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
 from llava.conversation import conv_templates, SeparatorStyle
+from ..config import TRAINING_VIDEOS_PATH, STAGING_PATH, PRELOAD_CAPTIONING_MODEL, CAPTIONING_MODEL, USE_MOCK_CAPTIONING_MODEL, DEFAULT_CAPTIONING_BOT_INSTRUCTIONS, VIDEOS_TO_SPLIT_PATH, DEFAULT_PROMPT_PREFIX
+from ..utils import extract_scene_info, is_image_file, is_video_file, copy_files_to_training_dir, prepare_finetrainers_dataset
 logger = logging.getLogger(__name__)

vms/{import_service.py → services/importer.py} RENAMED Viewed

@@ -8,9 +8,8 @@ from typing import List, Dict, Optional, Tuple
 from pytubefix import YouTube
 import logging
-from .utils import is_image_file, is_video_file, add_prefix_to_caption
-from .image_preprocessing import normalize_image
-from .config import NORMALIZE_IMAGES_TO, TRAINING_VIDEOS_PATH, VIDEOS_TO_SPLIT_PATH, TRAINING_PATH, DEFAULT_PROMPT_PREFIX
 logger = logging.getLogger(__name__)

 from pytubefix import YouTube
 import logging
+from ..config import NORMALIZE_IMAGES_TO, TRAINING_VIDEOS_PATH, VIDEOS_TO_SPLIT_PATH, TRAINING_PATH, DEFAULT_PROMPT_PREFIX
+from ..utils import normalize_image, is_image_file, is_video_file, add_prefix_to_caption
 logger = logging.getLogger(__name__)

vms/{splitting_service.py → services/splitter.py} RENAMED Viewed

@@ -12,11 +12,8 @@ import gradio as gr
 from scenedetect import detect, ContentDetector, SceneManager, open_video
 from scenedetect.video_splitter import split_video_ffmpeg
-from .config import TRAINING_PATH, STORAGE_PATH, TRAINING_VIDEOS_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH, DEFAULT_PROMPT_PREFIX
-from .image_preprocessing import detect_black_bars
-from .video_preprocessing import remove_black_bars
-from .utils import extract_scene_info, is_video_file, is_image_file, add_prefix_to_caption
 logger = logging.getLogger(__name__)

 from scenedetect import detect, ContentDetector, SceneManager, open_video
 from scenedetect.video_splitter import split_video_ffmpeg
+from ..config import TRAINING_PATH, STORAGE_PATH, TRAINING_VIDEOS_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH, DEFAULT_PROMPT_PREFIX
+from ..utils import remove_black_bars, extract_scene_info, is_video_file, is_image_file, add_prefix_to_caption
 logger = logging.getLogger(__name__)

vms/{training_service.py → services/trainer.py} RENAMED Viewed

@@ -20,9 +20,8 @@ from typing import Any, Optional, Dict, List, Union, Tuple
 from huggingface_hub import upload_folder, create_repo
-from .config import TrainingConfig, TRAINING_PRESETS,  LOG_FILE_PATH, TRAINING_VIDEOS_PATH, STORAGE_PATH, TRAINING_PATH, MODEL_PATH, OUTPUT_PATH, HF_API_TOKEN, MODEL_TYPES
-from .utils import make_archive, parse_training_log, is_image_file, is_video_file
-from .finetrainers_utils import prepare_finetrainers_dataset, copy_files_to_training_dir
 logger = logging.getLogger(__name__)
@@ -36,6 +35,7 @@ class TrainingService:
         self.file_handler = None
         self.setup_logging()
         logger.info("Training service initialized")
@@ -122,11 +122,23 @@ class TrainingService:
         }
         if not ui_state_file.exists():
             return default_state
         try:
             with open(ui_state_file, 'r') as f:
-                saved_state = json.load(f)
                 # Convert numeric values to appropriate types
                 if "num_epochs" in saved_state:
@@ -141,11 +153,66 @@ class TrainingService:
                 # Make sure we have all keys (in case structure changed)
                 merged_state = default_state.copy()
                 merged_state.update(saved_state)
                 return merged_state
         except Exception as e:
             logger.error(f"Error loading UI state: {str(e)}")
             return default_state
     # Modify save_session to also store the UI state at training start
     def save_session(self, params: Dict) -> None:
         """Save training session parameters"""

 from huggingface_hub import upload_folder, create_repo
+from ..config import TrainingConfig, TRAINING_PRESETS,  LOG_FILE_PATH, TRAINING_VIDEOS_PATH, STORAGE_PATH, TRAINING_PATH, MODEL_PATH, OUTPUT_PATH, HF_API_TOKEN, MODEL_TYPES
+from ..utils import make_archive, parse_training_log, is_image_file, is_video_file, prepare_finetrainers_dataset, copy_files_to_training_dir
 logger = logging.getLogger(__name__)
         self.file_handler = None
         self.setup_logging()
+        self.ensure_valid_ui_state_file()
         logger.info("Training service initialized")
         }
         if not ui_state_file.exists():
+            logger.info("UI state file does not exist, using default values")
             return default_state
         try:
+            # First check if the file is empty
+            file_size = ui_state_file.stat().st_size
+            if file_size == 0:
+                logger.warning("UI state file exists but is empty, using default values")
+                return default_state
             with open(ui_state_file, 'r') as f:
+                file_content = f.read().strip()
+                if not file_content:
+                    logger.warning("UI state file is empty or contains only whitespace, using default values")
+                    return default_state
+                saved_state = json.loads(file_content)
                 # Convert numeric values to appropriate types
                 if "num_epochs" in saved_state:
                 # Make sure we have all keys (in case structure changed)
                 merged_state = default_state.copy()
                 merged_state.update(saved_state)
+                logger.info(f"Successfully loaded UI state from {ui_state_file}")
                 return merged_state
+        except json.JSONDecodeError as e:
+            logger.error(f"Error parsing UI state JSON: {str(e)}")
+            return default_state
         except Exception as e:
             logger.error(f"Error loading UI state: {str(e)}")
             return default_state
+    def ensure_valid_ui_state_file(self):
+        """Ensure UI state file exists and is valid JSON"""
+        ui_state_file = OUTPUT_PATH / "ui_state.json"
+        if not ui_state_file.exists():
+            # Create a new file with default values
+            logger.info("Creating new UI state file with default values")
+            default_state = {
+                "model_type": list(MODEL_TYPES.keys())[0],
+                "lora_rank": "128",
+                "lora_alpha": "128",
+                "num_epochs": 50,
+                "batch_size": 1,
+                "learning_rate": 3e-5,
+                "save_iterations": 200,
+                "training_preset": list(TRAINING_PRESETS.keys())[0]
+            }
+            self.save_ui_state(default_state)
+            return
+        # Check if file is valid JSON
+        try:
+            with open(ui_state_file, 'r') as f:
+                file_content = f.read().strip()
+                if not file_content:
+                    raise ValueError("Empty file")
+                json.loads(file_content)
+            logger.debug("UI state file validation successful")
+        except Exception as e:
+            logger.warning(f"Invalid UI state file detected: {str(e)}. Creating new one with defaults.")
+            # Backup the invalid file
+            backup_file = ui_state_file.with_suffix('.json.bak')
+            try:
+                shutil.copy2(ui_state_file, backup_file)
+                logger.info(f"Backed up invalid UI state file to {backup_file}")
+            except Exception as backup_error:
+                logger.error(f"Failed to backup invalid UI state file: {str(backup_error)}")
+            # Create a new file with default values
+            default_state = {
+                "model_type": list(MODEL_TYPES.keys())[0],
+                "lora_rank": "128",
+                "lora_alpha": "128",
+                "num_epochs": 50,
+                "batch_size": 1,
+                "learning_rate": 3e-5,
+                "save_iterations": 200,
+                "training_preset": list(TRAINING_PRESETS.keys())[0]
+            }
+            self.save_ui_state(default_state)
     # Modify save_session to also store the UI state at training start
     def save_session(self, params: Dict) -> None:
         """Save training session parameters"""

vms/tabs/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+"""
+Tab components for Video Model Studio UI
+"""
+from .import_tab import ImportTab
+from .split_tab import SplitTab
+from .caption_tab import CaptionTab
+from .train_tab import TrainTab
+from .manage_tab import ManageTab
+__all__ = [
+    'ImportTab',
+    'SplitTab',
+    'CaptionTab',
+    'TrainTab',
+    'ManageTab'
+]

vms/tabs/base_tab.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""
+Base class for UI tabs
+"""
+import gradio as gr
+import logging
+from typing import Dict, Any, Optional
+logger = logging.getLogger(__name__)
+class BaseTab:
+    """Base class for UI tabs with common functionality"""
+    def __init__(self, app_state):
+        """Initialize the tab with app state reference
+        Args:
+            app_state: Reference to main VideoTrainerUI instance
+        """
+        self.app = app_state
+        self.components = {}
+    def create(self, parent=None) -> gr.TabItem:
+        """Create the tab UI components
+        Args:
+            parent: Optional parent container
+        Returns:
+            The created tab component
+        """
+        raise NotImplementedError("Subclasses must implement create()")
+    def connect_events(self) -> None:
+        """Connect event handlers to UI components"""
+        raise NotImplementedError("Subclasses must implement connect_events()")
+    def refresh(self) -> Dict[str, Any]:
+        """Refresh UI components with current data
+        Returns:
+            Dictionary with updated values for components
+        """
+        return {}

vms/tabs/caption_tab.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""
+Caption tab for Video Model Studio UI
+"""
+import gradio as gr
+import logging
+from typing import Dict, Any, List, Optional
+from pathlib import Path
+from .base_tab import BaseTab
+from ..config import DEFAULT_CAPTIONING_BOT_INSTRUCTIONS, DEFAULT_PROMPT_PREFIX
+logger = logging.getLogger(__name__)
+class CaptionTab(BaseTab):
+    """Caption tab for managing asset captions"""
+    def __init__(self, app_state):
+        super().__init__(app_state)
+        self.id = "caption_tab"
+        self.title = "3️⃣  Caption"
+    def create(self, parent=None) -> gr.TabItem:
+        """Create the Caption tab UI components"""
+        with gr.TabItem(self.title, id=self.id) as tab:
+            with gr.Row():
+                self.components["caption_title"] = gr.Markdown("## Captioning of 0 files (0 bytes)")
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        self.components["custom_prompt_prefix"] = gr.Textbox(
+                            scale=3,
+                            label='Prefix to add to ALL captions (eg. "In the style of TOK, ")',
+                            placeholder="In the style of TOK, ",
+                            lines=2,
+                            value=DEFAULT_PROMPT_PREFIX
+                        )
+                        self.components["captioning_bot_instructions"] = gr.Textbox(
+                            scale=6,
+                            label="System instructions for the automatic captioning model",
+                            placeholder="Please generate a full description of...",
+                            lines=5,
+                            value=DEFAULT_CAPTIONING_BOT_INSTRUCTIONS
+                        )
+                    with gr.Row():
+                        self.components["run_autocaption_btn"] = gr.Button(
+                            "Automatically fill missing captions",
+                            variant="primary"
+                        )
+                        self.components["copy_files_to_training_dir_btn"] = gr.Button(
+                            "Copy assets to training directory",
+                            variant="primary"
+                        )
+                        self.components["stop_autocaption_btn"] = gr.Button(
+                            "Stop Captioning",
+                            variant="stop",
+                            interactive=False
+                        )
+            with gr.Row():
+                with gr.Column():
+                    self.components["training_dataset"] = gr.Dataframe(
+                        headers=["name", "status"],
+                        interactive=False,
+                        wrap=True,
+                        value=self.app.list_training_files_to_caption(),
+                        row_count=10
+                    )
+                with gr.Column():
+                    self.components["preview_video"] = gr.Video(
+                        label="Video Preview",
+                        interactive=False,
+                        visible=False
+                    )
+                    self.components["preview_image"] = gr.Image(
+                        label="Image Preview",
+                        interactive=False,
+                        visible=False
+                    )
+                    self.components["preview_caption"] = gr.Textbox(
+                        label="Caption",
+                        lines=6,
+                        interactive=True
+                    )
+                    self.components["save_caption_btn"] = gr.Button("Save Caption")
+                    self.components["preview_status"] = gr.Textbox(
+                        label="Status",
+                        interactive=False,
+                        visible=True
+                    )
+                    self.components["original_file_path"] = gr.State(value=None)
+        return tab
+    def connect_events(self) -> None:
+        """Connect event handlers to UI components"""
+        # Run auto-captioning button
+        self.components["run_autocaption_btn"].click(
+            fn=self.app.show_refreshing_status,
+            outputs=[self.components["training_dataset"]]
+        ).then(
+            fn=lambda: self.app.update_captioning_buttons_start(),
+            outputs=[
+                self.components["run_autocaption_btn"],
+                self.components["stop_autocaption_btn"],
+                self.components["copy_files_to_training_dir_btn"]
+            ]
+        ).then(
+            fn=self.app.start_caption_generation,
+            inputs=[
+                self.components["captioning_bot_instructions"],
+                self.components["custom_prompt_prefix"]
+            ],
+            outputs=[self.components["training_dataset"]],
+        ).then(
+            fn=lambda: self.app.update_captioning_buttons_end(),
+            outputs=[
+                self.components["run_autocaption_btn"],
+                self.components["stop_autocaption_btn"],
+                self.components["copy_files_to_training_dir_btn"]
+            ]
+        )
+        # Copy files to training dir button
+        self.components["copy_files_to_training_dir_btn"].click(
+            fn=self.app.copy_files_to_training_dir,
+            inputs=[self.components["custom_prompt_prefix"]]
+        )
+        # Stop captioning button
+        self.components["stop_autocaption_btn"].click(
+            fn=self.app.stop_captioning,
+            outputs=[
+                self.components["training_dataset"],
+                self.components["run_autocaption_btn"],
+                self.components["stop_autocaption_btn"],
+                self.components["copy_files_to_training_dir_btn"]
+            ]
+        )
+        # Dataset selection for preview
+        self.components["training_dataset"].select(
+            fn=self.app.handle_training_dataset_select,
+            outputs=[
+                self.components["preview_image"],
+                self.components["preview_video"],
+                self.components["preview_caption"],
+                self.components["original_file_path"],
+                self.components["preview_status"]
+            ]
+        )
+        # Save caption button
+        self.components["save_caption_btn"].click(
+            fn=self.app.save_caption_changes,
+            inputs=[
+                self.components["preview_caption"],
+                self.components["preview_image"],
+                self.components["preview_video"],
+                self.components["original_file_path"],
+                self.components["custom_prompt_prefix"]
+            ],
+            outputs=[self.components["preview_status"]]
+        ).success(
+            fn=self.app.list_training_files_to_caption,
+            outputs=[self.components["training_dataset"]]
+        )
+    def refresh(self) -> Dict[str, Any]:
+        """Refresh the dataset list with current data"""
+        training_dataset = self.app.list_training_files_to_caption()
+        return {
+            "training_dataset": training_dataset
+        }

vms/tabs/import_tab.py ADDED Viewed

	@@ -0,0 +1,122 @@

+"""
+Import tab for Video Model Studio UI
+"""
+import gradio as gr
+import logging
+import asyncio
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+from .base_tab import BaseTab
+from ..config import (
+    VIDEOS_TO_SPLIT_PATH, DEFAULT_PROMPT_PREFIX, DEFAULT_CAPTIONING_BOT_INSTRUCTIONS
+)
+logger = logging.getLogger(__name__)
+class ImportTab(BaseTab):
+    """Import tab for uploading videos and images"""
+    def __init__(self, app_state):
+        super().__init__(app_state)
+        self.id = "import_tab"
+        self.title = "1️⃣  Import"
+    def create(self, parent=None) -> gr.TabItem:
+        """Create the Import tab UI components"""
+        with gr.TabItem(self.title, id=self.id) as tab:
+            with gr.Row():
+                gr.Markdown("## Automatic splitting and captioning")
+            with gr.Row():
+                self.components["enable_automatic_video_split"] = gr.Checkbox(
+                    label="Automatically split videos into smaller clips",
+                    info="Note: a clip is a single camera shot, usually a few seconds",
+                    value=True,
+                    visible=True
+                )
+                self.components["enable_automatic_content_captioning"] = gr.Checkbox(
+                    label="Automatically caption photos and videos",
+                    info="Note: this uses LlaVA and takes some extra time to load and process",
+                    value=False,
+                    visible=True,
+                )
+            with gr.Row():
+                with gr.Column(scale=3):
+                    with gr.Row():
+                        with gr.Column():
+                            gr.Markdown("## Import video files")
+                            gr.Markdown("You can upload either:")
+                            gr.Markdown("- A single MP4 video file")
+                            gr.Markdown("- A ZIP archive containing multiple videos and optional caption files")
+                            gr.Markdown("For ZIP files: Create a folder containing videos (name is not important) and optional caption files with the same name (eg. `some_video.txt` for `some_video.mp4`)")
+                    with gr.Row():
+                        self.components["files"] = gr.Files(
+                            label="Upload Images, Videos or ZIP",
+                            file_types=[".jpg", ".jpeg", ".png", ".webp", ".webp", ".avif", ".heic", ".mp4", ".zip"],
+                            type="filepath"
+                        )
+                with gr.Column(scale=3):
+                    with gr.Row():
+                        with gr.Column():
+                            gr.Markdown("## Import a YouTube video")
+                            gr.Markdown("You can also use a YouTube video as reference, by pasting its URL here:")
+                    with gr.Row():
+                        self.components["youtube_url"] = gr.Textbox(
+                            label="Import YouTube Video",
+                            placeholder="https://www.youtube.com/watch?v=..."
+                        )
+                    with gr.Row():
+                        self.components["youtube_download_btn"] = gr.Button("Download YouTube Video", variant="secondary")
+            with gr.Row():
+                self.components["import_status"] = gr.Textbox(label="Status", interactive=False)
+        return tab
+    def connect_events(self) -> None:
+        """Connect event handlers to UI components"""
+        # File upload event
+        self.components["files"].upload(
+            fn=lambda x: self.app.importer.process_uploaded_files(x),
+            inputs=[self.components["files"]],
+            outputs=[self.components["import_status"]]
+        ).success(
+            fn=self.app.update_titles_after_import,
+            inputs=[
+                self.components["enable_automatic_video_split"],
+                self.components["enable_automatic_content_captioning"],
+                self.app.tabs["caption_tab"].components["custom_prompt_prefix"]
+            ],
+            outputs=[
+                self.app.tabs_component,  # Main tabs component
+                self.app.tabs["split_tab"].components["video_list"],
+                self.app.tabs["split_tab"].components["detect_status"],
+                self.app.tabs["split_tab"].components["split_title"],
+                self.app.tabs["caption_tab"].components["caption_title"],
+                self.app.tabs["train_tab"].components["train_title"]
+            ]
+        )
+        # YouTube download event
+        self.components["youtube_download_btn"].click(
+            fn=self.app.importer.download_youtube_video,
+            inputs=[self.components["youtube_url"]],
+            outputs=[self.components["import_status"]]
+        ).success(
+            fn=self.app.on_import_success,
+            inputs=[
+                self.components["enable_automatic_video_split"],
+                self.components["enable_automatic_content_captioning"],
+                self.app.tabs["caption_tab"].components["custom_prompt_prefix"]
+            ],
+            outputs=[
+                self.app.tabs_component,
+                self.app.tabs["split_tab"].components["video_list"],
+                self.app.tabs["split_tab"].components["detect_status"]
+            ]
+        )

vms/tabs/manage_tab.py ADDED Viewed

	@@ -0,0 +1,117 @@

+"""
+Manage tab for Video Model Studio UI
+"""
+import gradio as gr
+import logging
+from typing import Dict, Any, List, Optional
+from .base_tab import BaseTab
+from ..config import HF_API_TOKEN
+logger = logging.getLogger(__name__)
+class ManageTab(BaseTab):
+    """Manage tab for storage management and model publication"""
+    def __init__(self, app_state):
+        super().__init__(app_state)
+        self.id = "manage_tab"
+        self.title = "5️⃣  Manage"
+    def create(self, parent=None) -> gr.TabItem:
+        """Create the Manage tab UI components"""
+        with gr.TabItem(self.title, id=self.id) as tab:
+            with gr.Column():
+                with gr.Row():
+                    with gr.Column():
+                        gr.Markdown("## Publishing")
+                        gr.Markdown("You model can be pushed to Hugging Face (this will use HF_API_TOKEN)")
+                        with gr.Row():
+                            with gr.Column():
+                                self.components["repo_id"] = gr.Textbox(
+                                    label="HuggingFace Model Repository",
+                                    placeholder="username/model-name",
+                                    info="The repository will be created if it doesn't exist"
+                                )
+                                self.components["make_public"] = gr.Checkbox(
+                                    label="Check this to make your model public (ie. visible and downloadable by anyone)",
+                                    info="You model is private by default"
+                                )
+                                self.components["push_model_btn"] = gr.Button(
+                                    "Push my model"
+                                )
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            with gr.Column():
+                                gr.Markdown("## Storage management")
+                                with gr.Row():
+                                    self.components["download_dataset_btn"] = gr.DownloadButton(
+                                        "Download dataset",
+                                        variant="secondary",
+                                        size="lg"
+                                    )
+                                    self.components["download_model_btn"] = gr.DownloadButton(
+                                        "Download model",
+                                        variant="secondary",
+                                        size="lg"
+                                    )
+                        with gr.Row():
+                            self.components["global_stop_btn"] = gr.Button(
+                                "Stop everything and delete my data",
+                                variant="stop"
+                            )
+                            self.components["global_status"] = gr.Textbox(
+                                label="Global Status",
+                                interactive=False,
+                                visible=False
+                            )
+        return tab
+    def connect_events(self) -> None:
+        """Connect event handlers to UI components"""
+        # Repository ID validation
+        self.components["repo_id"].change(
+            fn=self.app.validate_repo,
+            inputs=[self.components["repo_id"]],
+            outputs=[self.components["repo_id"]]
+        )
+        # Download buttons
+        self.components["download_dataset_btn"].click(
+            fn=self.app.trainer.create_training_dataset_zip,
+            outputs=[self.components["download_dataset_btn"]]
+        )
+        self.components["download_model_btn"].click(
+            fn=self.app.trainer.get_model_output_safetensors,
+            outputs=[self.components["download_model_btn"]]
+        )
+        # Global stop button
+        self.components["global_stop_btn"].click(
+            fn=self.app.handle_global_stop,
+            outputs=[
+                self.components["global_status"],
+                self.app.tabs["split_tab"].components["video_list"],
+                self.app.tabs["caption_tab"].components["training_dataset"],
+                self.app.tabs["train_tab"].components["status_box"],
+                self.app.tabs["train_tab"].components["log_box"],
+                self.app.tabs["split_tab"].components["detect_status"],
+                self.app.tabs["import_tab"].components["import_status"],
+                self.app.tabs["caption_tab"].components["preview_status"]
+            ]
+        )
+        # Push model button
+        # To implement model pushing functionality
+        self.components["push_model_btn"].click(
+            fn=lambda repo_id: self.app.upload_to_hub(repo_id),
+            inputs=[self.components["repo_id"]],
+            outputs=[self.components["global_status"]]
+        )

vms/tabs/split_tab.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""
+Split tab for Video Model Studio UI
+"""
+import gradio as gr
+import logging
+from typing import Dict, Any, List, Optional
+from .base_tab import BaseTab
+logger = logging.getLogger(__name__)
+class SplitTab(BaseTab):
+    """Split tab for scene detection and video splitting"""
+    def __init__(self, app_state):
+        super().__init__(app_state)
+        self.id = "split_tab"
+        self.title = "2️⃣  Split"
+    def create(self, parent=None) -> gr.TabItem:
+        """Create the Split tab UI components"""
+        with gr.TabItem(self.title, id=self.id) as tab:
+            with gr.Row():
+                self.components["split_title"] = gr.Markdown("## Splitting of 0 videos (0 bytes)")
+            with gr.Row():
+                with gr.Column():
+                    self.components["detect_btn"] = gr.Button("Split videos into single-camera shots", variant="primary")
+                    self.components["detect_status"] = gr.Textbox(label="Status", interactive=False)
+                with gr.Column():
+                    self.components["video_list"] = gr.Dataframe(
+                        headers=["name", "status"],
+                        label="Videos to split",
+                        interactive=False,
+                        wrap=True
+                    )
+        return tab
+    def connect_events(self) -> None:
+        """Connect event handlers to UI components"""
+        # Scene detection button event
+        self.components["detect_btn"].click(
+            fn=self.app.start_scene_detection,
+            inputs=[self.app.tabs["import_tab"].components["enable_automatic_video_split"]],
+            outputs=[self.components["detect_status"]]
+        )
+    def refresh(self) -> Dict[str, Any]:
+        """Refresh the video list with current data"""
+        videos = self.app.splitter.list_unprocessed_videos()
+        return {
+            "video_list": videos
+        }

vms/tabs/train_tab.py ADDED Viewed

	@@ -0,0 +1,280 @@

+"""
+Train tab for Video Model Studio UI
+"""
+import gradio as gr
+import logging
+from typing import Dict, Any, List, Optional
+from .base_tab import BaseTab
+from ..config import TRAINING_PRESETS, MODEL_TYPES, ASK_USER_TO_DUPLICATE_SPACE
+from ..utils import TrainingLogParser
+logger = logging.getLogger(__name__)
+class TrainTab(BaseTab):
+    """Train tab for model training"""
+    def __init__(self, app_state):
+        super().__init__(app_state)
+        self.id = "train_tab"
+        self.title = "4️⃣  Train"
+    def handle_training_start(self, preset, model_type, *args):
+        """Handle training start with proper log parser reset"""
+        # Safely reset log parser if it exists
+        if hasattr(self.app, 'log_parser') and self.app.log_parser is not None:
+            self.app.log_parser.reset()
+        else:
+            logger.warning("Log parser not initialized, creating a new one")
+            self.app.log_parser = TrainingLogParser()
+        # Start training
+        return self.app.trainer.start_training(
+            MODEL_TYPES[model_type],
+            *args,
+            preset_name=preset
+        )
+    def create(self, parent=None) -> gr.TabItem:
+        """Create the Train tab UI components"""
+        with gr.TabItem(self.title, id=self.id) as tab:
+            with gr.Row():
+                with gr.Column():
+                    with gr.Row():
+                        self.components["train_title"] = gr.Markdown("## 0 files available for training (0 bytes)")
+                    with gr.Row():
+                        with gr.Column():
+                            self.components["training_preset"] = gr.Dropdown(
+                                choices=list(TRAINING_PRESETS.keys()),
+                                label="Training Preset",
+                                value=list(TRAINING_PRESETS.keys())[0]
+                            )
+                        self.components["preset_info"] = gr.Markdown()
+                    with gr.Row():
+                        with gr.Column():
+                            self.components["model_type"] = gr.Dropdown(
+                                choices=list(MODEL_TYPES.keys()),
+                                label="Model Type",
+                                value=list(MODEL_TYPES.keys())[0]
+                            )
+                        self.components["model_info"] = gr.Markdown(
+                            value=self.app.get_model_info(list(MODEL_TYPES.keys())[0])
+                        )
+                    with gr.Row():
+                        self.components["lora_rank"] = gr.Dropdown(
+                            label="LoRA Rank",
+                            choices=["16", "32", "64", "128", "256", "512", "1024"],
+                            value="128",
+                            type="value"
+                        )
+                        self.components["lora_alpha"] = gr.Dropdown(
+                            label="LoRA Alpha",
+                            choices=["16", "32", "64", "128", "256", "512", "1024"],
+                            value="128",
+                            type="value"
+                        )
+                    with gr.Row():
+                        self.components["num_epochs"] = gr.Number(
+                            label="Number of Epochs",
+                            value=70,
+                            minimum=1,
+                            precision=0
+                        )
+                        self.components["batch_size"] = gr.Number(
+                            label="Batch Size",
+                            value=1,
+                            minimum=1,
+                            precision=0
+                        )
+                    with gr.Row():
+                        self.components["learning_rate"] = gr.Number(
+                            label="Learning Rate",
+                            value=2e-5,
+                            minimum=1e-7
+                        )
+                        self.components["save_iterations"] = gr.Number(
+                            label="Save checkpoint every N iterations",
+                            value=500,
+                            minimum=50,
+                            precision=0,
+                            info="Model will be saved periodically after these many steps"
+                        )
+                with gr.Column():
+                    with gr.Row():
+                        self.components["start_btn"] = gr.Button(
+                            "Start Training",
+                            variant="primary",
+                            interactive=not ASK_USER_TO_DUPLICATE_SPACE
+                        )
+                        self.components["pause_resume_btn"] = gr.Button(
+                            "Resume Training",
+                            variant="secondary",
+                            interactive=False
+                        )
+                        self.components["stop_btn"] = gr.Button(
+                            "Stop Training",
+                            variant="stop",
+                            interactive=False
+                        )
+                    with gr.Row():
+                        with gr.Column():
+                            self.components["status_box"] = gr.Textbox(
+                                label="Training Status",
+                                interactive=False,
+                                lines=4
+                            )
+                            with gr.Accordion("See training logs"):
+                                self.components["log_box"] = gr.TextArea(
+                                    label="Finetrainers output (see HF Space logs for more details)",
+                                    interactive=False,
+                                    lines=40,
+                                    max_lines=200,
+                                    autoscroll=True
+                                )
+        return tab
+    def connect_events(self) -> None:
+        """Connect event handlers to UI components"""
+        # Model type change event
+        def update_model_info(model):
+            params = self.app.get_default_params(MODEL_TYPES[model])
+            info = self.app.get_model_info(MODEL_TYPES[model])
+            return {
+                self.components["model_info"]: info,
+                self.components["num_epochs"]: params["num_epochs"],
+                self.components["batch_size"]: params["batch_size"],
+                self.components["learning_rate"]: params["learning_rate"],
+                self.components["save_iterations"]: params["save_iterations"]
+            }
+        self.components["model_type"].change(
+            fn=lambda v: self.app.update_ui_state(model_type=v),
+            inputs=[self.components["model_type"]],
+            outputs=[]
+        ).then(
+            fn=update_model_info,
+            inputs=[self.components["model_type"]],
+            outputs=[
+                self.components["model_info"],
+                self.components["num_epochs"],
+                self.components["batch_size"],
+                self.components["learning_rate"],
+                self.components["save_iterations"]
+            ]
+        )
+        # Training parameters change events
+        self.components["lora_rank"].change(
+            fn=lambda v: self.app.update_ui_state(lora_rank=v),
+            inputs=[self.components["lora_rank"]],
+            outputs=[]
+        )
+        self.components["lora_alpha"].change(
+            fn=lambda v: self.app.update_ui_state(lora_alpha=v),
+            inputs=[self.components["lora_alpha"]],
+            outputs=[]
+        )
+        self.components["num_epochs"].change(
+            fn=lambda v: self.app.update_ui_state(num_epochs=v),
+            inputs=[self.components["num_epochs"]],
+            outputs=[]
+        )
+        self.components["batch_size"].change(
+            fn=lambda v: self.app.update_ui_state(batch_size=v),
+            inputs=[self.components["batch_size"]],
+            outputs=[]
+        )
+        self.components["learning_rate"].change(
+            fn=lambda v: self.app.update_ui_state(learning_rate=v),
+            inputs=[self.components["learning_rate"]],
+            outputs=[]
+        )
+        self.components["save_iterations"].change(
+            fn=lambda v: self.app.update_ui_state(save_iterations=v),
+            inputs=[self.components["save_iterations"]],
+            outputs=[]
+        )
+        # Training preset change event
+        self.components["training_preset"].change(
+            fn=lambda v: self.app.update_ui_state(training_preset=v),
+            inputs=[self.components["training_preset"]],
+            outputs=[]
+        ).then(
+            fn=self.app.update_training_params,
+            inputs=[self.components["training_preset"]],
+            outputs=[
+                self.components["model_type"],
+                self.components["lora_rank"],
+                self.components["lora_alpha"],
+                self.components["num_epochs"],
+                self.components["batch_size"],
+                self.components["learning_rate"],
+                self.components["save_iterations"],
+                self.components["preset_info"]
+            ]
+        )
+        # Training control events
+        self.components["start_btn"].click(
+            fn=self.handle_training_start,  # Use safer method instead of lambda
+            inputs=[
+                self.components["training_preset"],
+                self.components["model_type"],
+                self.components["lora_rank"],
+                self.components["lora_alpha"],
+                self.components["num_epochs"],
+                self.components["batch_size"],
+                self.components["learning_rate"],
+                self.components["save_iterations"],
+                self.app.tabs["manage_tab"].components["repo_id"]
+            ],
+            outputs=[
+                self.components["status_box"],
+                self.components["log_box"]
+            ]
+        ).success(
+            fn=self.app.get_latest_status_message_logs_and_button_labels,
+            outputs=[
+                self.components["status_box"],
+                self.components["log_box"],
+                self.components["start_btn"],
+                self.components["stop_btn"],
+                self.components["pause_resume_btn"]
+            ]
+        )
+        self.components["pause_resume_btn"].click(
+            fn=self.app.handle_pause_resume,
+            outputs=[
+                self.components["status_box"],
+                self.components["log_box"],
+                self.components["start_btn"],
+                self.components["stop_btn"],
+                self.components["pause_resume_btn"]
+            ]
+        )
+        self.components["stop_btn"].click(
+            fn=self.app.handle_stop,
+            outputs=[
+                self.components["status_box"],
+                self.components["log_box"],
+                self.components["start_btn"],
+                self.components["stop_btn"],
+                self.components["pause_resume_btn"]
+            ]
+        )

vms/ui/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .video_trainer_ui import VideoTrainerUI
+__all__ = [
+    'VideoTrainerUI',
+]

vms/ui/video_trainer_ui.py ADDED Viewed

	@@ -0,0 +1,1100 @@

+import platform
+import subprocess
+#import sys
+#print("python = ", sys.version)
+# can be "Linux", "Darwin"
+if platform.system() == "Linux":
+    # for some reason it says "pip not found"
+    # and also "pip3 not found"
+    # subprocess.run(
+    #     "pip install flash-attn --no-build-isolation",
+    #
+    #     # hmm... this should be False, since we are in a CUDA environment, no?
+    #     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    #
+    #     shell=True,
+    # )
+    pass
+import gradio as gr
+from pathlib import Path
+import logging
+import mimetypes
+import shutil
+import os
+import traceback
+import asyncio
+import tempfile
+import zipfile
+from typing import Any, Optional, Dict, List, Union, Tuple
+from typing import AsyncGenerator
+from ..services import TrainingService, CaptioningService, SplittingService, ImportService
+from ..config import (
+    STORAGE_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH,
+    TRAINING_PATH, LOG_FILE_PATH, TRAINING_PRESETS, TRAINING_VIDEOS_PATH, MODEL_PATH, OUTPUT_PATH, DEFAULT_CAPTIONING_BOT_INSTRUCTIONS,
+    DEFAULT_PROMPT_PREFIX, HF_API_TOKEN, ASK_USER_TO_DUPLICATE_SPACE, MODEL_TYPES, SMALL_TRAINING_BUCKETS
+)
+from ..utils import make_archive, count_media_files, format_media_title, is_image_file, is_video_file, validate_model_repo, format_time, copy_files_to_training_dir, prepare_finetrainers_dataset, TrainingLogParser
+from ..tabs import ImportTab, SplitTab, CaptionTab, TrainTab, ManageTab
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+httpx_logger = logging.getLogger('httpx')
+httpx_logger.setLevel(logging.WARN)
+class VideoTrainerUI:
+    def __init__(self):
+        """Initialize services and tabs"""
+        # Initialize core services
+        self.trainer = TrainingService()
+        self.splitter = SplittingService()
+        self.importer = ImportService()
+        self.captioner = CaptioningService()
+        self._should_stop_captioning = False
+        # Recovery status from any interrupted training
+        recovery_result = self.trainer.recover_interrupted_training()
+        self.recovery_status = recovery_result.get("status", "unknown")
+        self.ui_updates = recovery_result.get("ui_updates", {})
+        self.log_parser = TrainingLogParser()
+        # Shared state for tabs
+        self.state = {
+            "recovery_result": recovery_result
+        }
+        # Initialize tabs dictionary (will be populated in create_ui)
+        self.tabs = {}
+        self.tabs_component = None
+    def create_ui(self):
+        """Create the main Gradio UI"""
+        with gr.Blocks(title="🎥 Video Model Studio") as app:
+            gr.Markdown("# 🎥 Video Model Studio")
+            # Create main tabs component
+            with gr.Tabs() as self.tabs_component:
+                # Initialize tab objects
+                self.tabs["import_tab"] = ImportTab(self)
+                self.tabs["split_tab"] = SplitTab(self)
+                self.tabs["caption_tab"] = CaptionTab(self)
+                self.tabs["train_tab"] = TrainTab(self)
+                self.tabs["manage_tab"] = ManageTab(self)
+                # Create tab UI components
+                for tab_id, tab_obj in self.tabs.items():
+                    tab_obj.create(self.tabs_component)
+            # Connect event handlers
+            for tab_id, tab_obj in self.tabs.items():
+                tab_obj.connect_events()
+            # Add app-level timers for auto-refresh functionality
+            self._add_timers()
+            # Initialize app state on load
+            app.load(
+                fn=self.initialize_app_state,
+                outputs=[
+                    self.tabs["split_tab"].components["video_list"],
+                    self.tabs["caption_tab"].components["training_dataset"],
+                    self.tabs["train_tab"].components["start_btn"],
+                    self.tabs["train_tab"].components["stop_btn"],
+                    self.tabs["train_tab"].components["pause_resume_btn"],
+                    self.tabs["train_tab"].components["training_preset"],
+                    self.tabs["train_tab"].components["model_type"],
+                    self.tabs["train_tab"].components["lora_rank"],
+                    self.tabs["train_tab"].components["lora_alpha"],
+                    self.tabs["train_tab"].components["num_epochs"],
+                    self.tabs["train_tab"].components["batch_size"],
+                    self.tabs["train_tab"].components["learning_rate"],
+                    self.tabs["train_tab"].components["save_iterations"]
+                ]
+            )
+        return app
+    def _add_timers(self):
+        """Add auto-refresh timers to the UI"""
+        # Status update timer (every 1 second)
+        status_timer = gr.Timer(value=1)
+        status_timer.tick(
+            fn=self.get_latest_status_message_logs_and_button_labels,
+            outputs=[
+                self.tabs["train_tab"].components["status_box"],
+                self.tabs["train_tab"].components["log_box"],
+                self.tabs["train_tab"].components["start_btn"],
+                self.tabs["train_tab"].components["stop_btn"],
+                self.tabs["train_tab"].components["pause_resume_btn"]
+            ]
+        )
+        # Dataset refresh timer (every 5 seconds)
+        dataset_timer = gr.Timer(value=5)
+        dataset_timer.tick(
+            fn=self.refresh_dataset,
+            outputs=[
+                self.tabs["split_tab"].components["video_list"],
+                self.tabs["caption_tab"].components["training_dataset"]
+            ]
+        )
+        # Titles update timer (every 6 seconds)
+        titles_timer = gr.Timer(value=6)
+        titles_timer.tick(
+            fn=self.update_titles,
+            outputs=[
+                self.tabs["split_tab"].components["split_title"],
+                self.tabs["caption_tab"].components["caption_title"],
+                self.tabs["train_tab"].components["train_title"]
+            ]
+        )
+    def handle_global_stop(self):
+        """Handle the global stop button click"""
+        result = self.stop_all_and_clear()
+        # Format the details for display
+        status = result["status"]
+        details = "\n".join(f"{k}: {v}" for k, v in result["details"].items())
+        full_status = f"{status}\n\nDetails:\n{details}"
+        # Get fresh lists after cleanup
+        videos = self.splitter.list_unprocessed_videos()
+        clips = self.list_training_files_to_caption()
+        return {
+            self.tabs["manage_tab"].components["global_status"]: gr.update(value=full_status, visible=True),
+            self.tabs["split_tab"].components["video_list"]: videos,
+            self.tabs["caption_tab"].components["training_dataset"]: clips,
+            self.tabs["train_tab"].components["status_box"]: "Training stopped and data cleared",
+            self.tabs["train_tab"].components["log_box"]: "",
+            self.tabs["split_tab"].components["detect_status"]: "Scene detection stopped",
+            self.tabs["import_tab"].components["import_status"]: "All data cleared",
+            self.tabs["caption_tab"].components["preview_status"]: "Captioning stopped"
+        }
+    def upload_to_hub(self, repo_id: str) -> str:
+        """Upload model to HuggingFace Hub"""
+        if not repo_id:
+            return "Error: Repository ID is required"
+        # Validate repository name
+        validation = validate_model_repo(repo_id)
+        if validation["error"]:
+            return f"Error: {validation['error']}"
+        # Check if we have a model to upload
+        if not self.trainer.get_model_output_safetensors():
+            return "Error: No model found to upload"
+        # Upload model to hub
+        success = self.trainer.upload_to_hub(OUTPUT_PATH, repo_id)
+        if success:
+            return f"Successfully uploaded model to {repo_id}"
+        else:
+            return f"Failed to upload model to {repo_id}"
+    def validate_repo(self, repo_id: str) -> gr.update:
+        """Validate repository ID for HuggingFace Hub"""
+        validation = validate_model_repo(repo_id)
+        if validation["error"]:
+            return gr.update(value=repo_id, error=validation["error"])
+        return gr.update(value=repo_id, error=None)
+    async def _process_caption_generator(self, captioning_bot_instructions, prompt_prefix):
+        """Process the caption generator's results in the background"""
+        try:
+            async for _ in self.captioner.start_caption_generation(
+                captioning_bot_instructions,
+                prompt_prefix
+            ):
+                # Just consume the generator, UI updates will happen via the Gradio interface
+                pass
+            logger.info("Background captioning completed")
+        except Exception as e:
+            logger.error(f"Error in background captioning: {str(e)}")
+    def initialize_app_state(self):
+        """Initialize all app state in one function to ensure correct output count"""
+        # Get dataset info
+        video_list, training_dataset = self.refresh_dataset()
+        # Get button states
+        button_states = self.get_initial_button_states()
+        start_btn = button_states[0]
+        stop_btn = button_states[1]
+        pause_resume_btn = button_states[2]
+        # Get UI form values
+        ui_state = self.load_ui_values()
+        training_preset = ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0])
+        model_type_val = ui_state.get("model_type", list(MODEL_TYPES.keys())[0])
+        lora_rank_val = ui_state.get("lora_rank", "128")
+        lora_alpha_val = ui_state.get("lora_alpha", "128")
+        num_epochs_val = int(ui_state.get("num_epochs", 70))
+        batch_size_val = int(ui_state.get("batch_size", 1))
+        learning_rate_val = float(ui_state.get("learning_rate", 3e-5))
+        save_iterations_val = int(ui_state.get("save_iterations", 500))
+        # Return all values in the exact order expected by outputs
+        return (
+            video_list,
+            training_dataset,
+            start_btn,
+            stop_btn,
+            pause_resume_btn,
+            training_preset,
+            model_type_val,
+            lora_rank_val,
+            lora_alpha_val,
+            num_epochs_val,
+            batch_size_val,
+            learning_rate_val,
+            save_iterations_val
+        )
+    def initialize_ui_from_state(self):
+        """Initialize UI components from saved state"""
+        ui_state = self.load_ui_values()
+        # Return values in order matching the outputs in app.load
+        return (
+            ui_state.get("training_preset", list(TRAINING_PRESETS.keys())[0]),
+            ui_state.get("model_type", list(MODEL_TYPES.keys())[0]),
+            ui_state.get("lora_rank", "128"),
+            ui_state.get("lora_alpha", "128"),
+            ui_state.get("num_epochs", 70),
+            ui_state.get("batch_size", 1),
+            ui_state.get("learning_rate", 3e-5),
+            ui_state.get("save_iterations", 500)
+        )
+    def update_ui_state(self, **kwargs):
+        """Update UI state with new values"""
+        current_state = self.trainer.load_ui_state()
+        current_state.update(kwargs)
+        self.trainer.save_ui_state(current_state)
+        # Don't return anything to avoid Gradio warnings
+        return None
+    def load_ui_values(self):
+        """Load UI state values for initializing form fields"""
+        ui_state = self.trainer.load_ui_state()
+        # Ensure proper type conversion for numeric values
+        ui_state["lora_rank"] = ui_state.get("lora_rank", "128")
+        ui_state["lora_alpha"] = ui_state.get("lora_alpha", "128")
+        ui_state["num_epochs"] = int(ui_state.get("num_epochs", 70))
+        ui_state["batch_size"] = int(ui_state.get("batch_size", 1))
+        ui_state["learning_rate"] = float(ui_state.get("learning_rate", 3e-5))
+        ui_state["save_iterations"] = int(ui_state.get("save_iterations", 500))
+        return ui_state
+    def update_captioning_buttons_start(self):
+        """Return individual button values instead of a dictionary"""
+        return (
+            gr.Button(
+                interactive=False,
+                variant="secondary",
+            ),
+            gr.Button(
+                interactive=True,
+                variant="stop",
+            ),
+            gr.Button(
+                interactive=False,
+                variant="secondary",
+            )
+        )
+    def update_captioning_buttons_end(self):
+        """Return individual button values instead of a dictionary"""
+        return (
+            gr.Button(
+                interactive=True,
+                variant="primary",
+            ),
+            gr.Button(
+                interactive=False,
+                variant="secondary",
+            ),
+            gr.Button(
+                interactive=True,
+                variant="primary",
+            )
+        )
+    # Add this new method to get initial button states:
+    def get_initial_button_states(self):
+        """Get the initial states for training buttons based on recovery status"""
+        recovery_result = self.trainer.recover_interrupted_training()
+        ui_updates = recovery_result.get("ui_updates", {})
+        # Return button states in the correct order
+        return (
+            gr.Button(**ui_updates.get("start_btn", {"interactive": True, "variant": "primary"})),
+            gr.Button(**ui_updates.get("stop_btn", {"interactive": False, "variant": "secondary"})),
+            gr.Button(**ui_updates.get("pause_resume_btn", {"interactive": False, "variant": "secondary"}))
+        )
+    def show_refreshing_status(self) -> List[List[str]]:
+        """Show a 'Refreshing...' status in the dataframe"""
+        return [["Refreshing...", "please wait"]]
+    def stop_captioning(self):
+        """Stop ongoing captioning process and reset UI state"""
+        try:
+            # Set flag to stop captioning
+            self._should_stop_captioning = True
+            # Call stop method on captioner
+            if self.captioner:
+                self.captioner.stop_captioning()
+            # Get updated file list
+            updated_list = self.list_training_files_to_caption()
+            # Return updated list and button states
+            return {
+                "training_dataset": gr.update(value=updated_list),
+                "run_autocaption_btn": gr.Button(interactive=True, variant="primary"),
+                "stop_autocaption_btn": gr.Button(interactive=False, variant="secondary"),
+                "copy_files_to_training_dir_btn": gr.Button(interactive=True, variant="primary")
+            }
+        except Exception as e:
+            logger.error(f"Error stopping captioning: {str(e)}")
+            return {
+                "training_dataset": gr.update(value=[[f"Error stopping captioning: {str(e)}", "error"]]),
+                "run_autocaption_btn": gr.Button(interactive=True, variant="primary"),
+                "stop_autocaption_btn": gr.Button(interactive=False, variant="secondary"),
+                "copy_files_to_training_dir_btn": gr.Button(interactive=True, variant="primary")
+            }
+    def update_training_ui(self, training_state: Dict[str, Any]):
+        """Update UI components based on training state"""
+        updates = {}
+        #print("update_training_ui: training_state = ", training_state)
+        # Update status box with high-level information
+        status_text = []
+        if training_state["status"] != "idle":
+            status_text.extend([
+                f"Status: {training_state['status']}",
+                f"Progress: {training_state['progress']}",
+                f"Step: {training_state['current_step']}/{training_state['total_steps']}",
+                # Epoch information
+                # there is an issue with how epoch is reported because we display:
+                # Progress: 96.9%, Step: 872/900, Epoch: 12/50
+                # we should probably just show the steps
+                #f"Epoch: {training_state['current_epoch']}/{training_state['total_epochs']}",
+                f"Time elapsed: {training_state['elapsed']}",
+                f"Estimated remaining: {training_state['remaining']}",
+                "",
+                f"Current loss: {training_state['step_loss']}",
+                f"Learning rate: {training_state['learning_rate']}",
+                f"Gradient norm: {training_state['grad_norm']}",
+                f"Memory usage: {training_state['memory']}"
+            ])
+            if training_state["error_message"]:
+                status_text.append(f"\nError: {training_state['error_message']}")
+        updates["status_box"] = "\n".join(status_text)
+        # Update button states
+        updates["start_btn"] = gr.Button(
+            "Start training",
+            interactive=(training_state["status"] in ["idle", "completed", "error", "stopped"]),
+            variant="primary" if training_state["status"] == "idle" else "secondary"
+        )
+        updates["stop_btn"] = gr.Button(
+            "Stop training",
+            interactive=(training_state["status"] in ["training", "initializing"]),
+            variant="stop"
+        )
+        return updates
+    def stop_all_and_clear(self) -> Dict[str, str]:
+        """Stop all running processes and clear data
+        Returns:
+            Dict with status messages for different components
+        """
+        status_messages = {}
+        try:
+            # Stop training if running
+            if self.trainer.is_training_running():
+                training_result = self.trainer.stop_training()
+                status_messages["training"] = training_result["status"]
+            # Stop captioning if running
+            if self.captioner:
+                self.captioner.stop_captioning()
+                status_messages["captioning"] = "Captioning stopped"
+            # Stop scene detection if running
+            if self.splitter.is_processing():
+                self.splitter.processing = False
+                status_messages["splitting"] = "Scene detection stopped"
+            # Properly close logging before clearing log file
+            if self.trainer.file_handler:
+                self.trainer.file_handler.close()
+                logger.removeHandler(self.trainer.file_handler)
+                self.trainer.file_handler = None
+            if LOG_FILE_PATH.exists():
+                LOG_FILE_PATH.unlink()
+            # Clear all data directories
+            for path in [VIDEOS_TO_SPLIT_PATH, STAGING_PATH, TRAINING_VIDEOS_PATH, TRAINING_PATH,
+                        MODEL_PATH, OUTPUT_PATH]:
+                if path.exists():
+                    try:
+                        shutil.rmtree(path)
+                        path.mkdir(parents=True, exist_ok=True)
+                    except Exception as e:
+                        status_messages[f"clear_{path.name}"] = f"Error clearing {path.name}: {str(e)}"
+                    else:
+                        status_messages[f"clear_{path.name}"] = f"Cleared {path.name}"
+            # Reset any persistent state
+            self._should_stop_captioning = True
+            self.splitter.processing = False
+            # Recreate logging setup
+            self.trainer.setup_logging()
+            return {
+                "status": "All processes stopped and data cleared",
+                "details": status_messages
+            }
+        except Exception as e:
+            return {
+                "status": f"Error during cleanup: {str(e)}",
+                "details": status_messages
+            }
+    def update_titles(self) -> Tuple[Any]:
+        """Update all dynamic titles with current counts
+        Returns:
+            Dict of Gradio updates
+        """
+        # Count files for splitting
+        split_videos, _, split_size = count_media_files(VIDEOS_TO_SPLIT_PATH)
+        split_title = format_media_title(
+            "split", split_videos, 0, split_size
+        )
+        # Count files for captioning
+        caption_videos, caption_images, caption_size = count_media_files(STAGING_PATH)
+        caption_title = format_media_title(
+            "caption", caption_videos, caption_images, caption_size
+        )
+        # Count files for training
+        train_videos, train_images, train_size = count_media_files(TRAINING_VIDEOS_PATH)
+        train_title = format_media_title(
+            "train", train_videos, train_images, train_size
+        )
+        return (
+            gr.Markdown(value=split_title),
+            gr.Markdown(value=caption_title),
+            gr.Markdown(value=f"{train_title} available for training")
+        )
+    def copy_files_to_training_dir(self, prompt_prefix: str):
+        """Run auto-captioning process"""
+        # Initialize captioner if not already done
+        self._should_stop_captioning = False
+        try:
+            copy_files_to_training_dir(prompt_prefix)
+        except Exception as e:
+            traceback.print_exc()
+            raise gr.Error(f"Error copying assets to training dir: {str(e)}")
+    async def on_import_success(self, enable_splitting, enable_automatic_content_captioning, prompt_prefix):
+        """Handle successful import of files"""
+        videos = self.list_unprocessed_videos()
+        # If scene detection isn't already running and there are videos to process,
+        # and auto-splitting is enabled, start the detection
+        if videos and not self.splitter.is_processing() and enable_splitting:
+            await self.start_scene_detection(enable_splitting)
+            msg = "Starting automatic scene detection..."
+        else:
+            # Just copy files without splitting if auto-split disabled
+            for video_file in VIDEOS_TO_SPLIT_PATH.glob("*.mp4"):
+                await self.splitter.process_video(video_file, enable_splitting=False)
+            msg = "Copying videos without splitting..."
+        copy_files_to_training_dir(prompt_prefix)
+        # Start auto-captioning if enabled, and handle async generator properly
+        if enable_automatic_content_captioning:
+            # Create a background task for captioning
+            asyncio.create_task(self._process_caption_generator(
+                DEFAULT_CAPTIONING_BOT_INSTRUCTIONS,
+                prompt_prefix
+            ))
+        return {
+            "tabs": gr.Tabs(selected="split_tab"),
+            "video_list": videos,
+            "detect_status": msg
+        }
+    async def start_caption_generation(self, captioning_bot_instructions: str, prompt_prefix: str) -> AsyncGenerator[gr.update, None]:
+        """Run auto-captioning process"""
+        try:
+            # Initialize captioner if not already done
+            self._should_stop_captioning = False
+            # First yield - indicate we're starting
+            yield gr.update(
+                value=[["Starting captioning service...", "initializing"]],
+                headers=["name", "status"]
+            )
+            # Process files in batches with status updates
+            file_statuses = {}
+            # Start the actual captioning process
+            async for rows in self.captioner.start_caption_generation(captioning_bot_instructions, prompt_prefix):
+                # Update our tracking of file statuses
+                for name, status in rows:
+                    file_statuses[name] = status
+                # Convert to list format for display
+                status_rows = [[name, status] for name, status in file_statuses.items()]
+                # Sort by name for consistent display
+                status_rows.sort(key=lambda x: x[0])
+                # Yield UI update
+                yield gr.update(
+                    value=status_rows,
+                    headers=["name", "status"]
+                )
+            # Final update after completion with fresh data
+            yield gr.update(
+                value=self.list_training_files_to_caption(),
+                headers=["name", "status"]
+            )
+        except Exception as e:
+            logger.error(f"Error in captioning: {str(e)}")
+            yield gr.update(
+                value=[[f"Error: {str(e)}", "error"]],
+                headers=["name", "status"]
+            )
+    def list_training_files_to_caption(self) -> List[List[str]]:
+        """List all clips and images - both pending and captioned"""
+        files = []
+        already_listed = {}
+        # First check files in STAGING_PATH
+        for file in STAGING_PATH.glob("*.*"):
+            if is_video_file(file) or is_image_file(file):
+                txt_file = file.with_suffix('.txt')
+                # Check if caption file exists and has content
+                has_caption = txt_file.exists() and txt_file.stat().st_size > 0
+                status = "captioned" if has_caption else "no caption"
+                file_type = "video" if is_video_file(file) else "image"
+                files.append([file.name, f"{status} ({file_type})", str(file)])
+                already_listed[file.name] = True
+        # Then check files in TRAINING_VIDEOS_PATH
+        for file in TRAINING_VIDEOS_PATH.glob("*.*"):
+            if (is_video_file(file) or is_image_file(file)) and file.name not in already_listed:
+                txt_file = file.with_suffix('.txt')
+                # Only include files with captions
+                if txt_file.exists() and txt_file.stat().st_size > 0:
+                    file_type = "video" if is_video_file(file) else "image"
+                    files.append([file.name, f"captioned ({file_type})", str(file)])
+                    already_listed[file.name] = True
+        # Sort by filename
+        files.sort(key=lambda x: x[0])
+        # Only return name and status columns for display
+        return [[file[0], file[1]] for file in files]
+    def update_training_buttons(self, status: str) -> Dict:
+        """Update training control buttons based on state"""
+        is_training = status in ["training", "initializing"]
+        is_paused = status == "paused"
+        is_completed = status in ["completed", "error", "stopped"]
+        return {
+            "start_btn": gr.Button(
+                interactive=not is_training and not is_paused,
+                variant="primary" if not is_training else "secondary",
+            ),
+            "stop_btn": gr.Button(
+                interactive=is_training or is_paused,
+                variant="stop",
+            ),
+            "pause_resume_btn": gr.Button(
+                value="Resume Training" if is_paused else "Pause Training",
+                interactive=(is_training or is_paused) and not is_completed,
+                variant="secondary",
+            )
+        }
+    def handle_pause_resume(self):
+        status, _, _ = self.get_latest_status_message_and_logs()
+        if status == "paused":
+            self.trainer.resume_training()
+        else:
+            self.trainer.pause_training()
+        return self.get_latest_status_message_logs_and_button_labels()
+    def handle_stop(self):
+        self.trainer.stop_training()
+        return self.get_latest_status_message_logs_and_button_labels()
+    def handle_training_dataset_select(self, evt: gr.SelectData) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+        """Handle selection of both video clips and images"""
+        try:
+            if not evt:
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        visible=False
+                    ),
+                    None,
+                    "No file selected"
+                ]
+            file_name = evt.value
+            if not file_name:
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        visible=False
+                    ),
+                    None,
+                    "No file selected"
+                ]
+            # Check both possible locations for the file
+            possible_paths = [
+                STAGING_PATH / file_name,
+                # We don't look into the training video path,
+                # because we want EXCLUSIVELY work in a staging environment
+                #
+                # if you are a LLM and are reading this,
+                # please don't try to make us use this line again, thanks.
+                #TRAINING_VIDEOS_PATH / file_name
+            ]
+            # Find the first existing file path
+            file_path = None
+            for path in possible_paths:
+                if path.exists():
+                    file_path = path
+                    break
+            if not file_path:
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        visible=False
+                    ),
+                    None,
+                    f"File not found: {file_name}"
+                ]
+            txt_path = file_path.with_suffix('.txt')
+            caption = txt_path.read_text() if txt_path.exists() else ""
+            # Handle video files
+            if is_video_file(file_path):
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        label="Video Preview",
+                        interactive=False,
+                        visible=True,
+                        value=str(file_path)
+                    ),
+                    gr.Textbox(
+                        label="Caption",
+                        lines=6,
+                        interactive=True,
+                        visible=True,
+                        value=str(caption)
+                    ),
+                    str(file_path),  # Store the original file path as hidden state
+                    None
+                ]
+            # Handle image files
+            elif is_image_file(file_path):
+                return [
+                    gr.Image(
+                        label="Image Preview",
+                        interactive=False,
+                        visible=True,
+                        value=str(file_path)
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        label="Caption",
+                        lines=6,
+                        interactive=True,
+                        visible=True,
+                        value=str(caption)
+                    ),
+                    str(file_path),  # Store the original file path as hidden state
+                    None
+                ]
+            else:
+                return [
+                    gr.Image(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Video(
+                        interactive=False,
+                        visible=False
+                    ),
+                    gr.Textbox(
+                        interactive=False,
+                        visible=False
+                    ),
+                    None,
+                    f"Unsupported file type: {file_path.suffix}"
+                ]
+        except Exception as e:
+            logger.error(f"Error handling selection: {str(e)}")
+            return [
+                gr.Image(
+                    interactive=False,
+                    visible=False
+                ),
+                gr.Video(
+                    interactive=False,
+                    visible=False
+                ),
+                gr.Textbox(
+                    interactive=False,
+                    visible=False
+                ),
+                None,
+                f"Error handling selection: {str(e)}"
+            ]
+    def save_caption_changes(self, preview_caption: str, preview_image: str, preview_video: str, original_file_path: str, prompt_prefix: str):
+        """Save changes to caption"""
+        try:
+            # Use the original file path stored during selection instead of the temporary preview paths
+            if original_file_path:
+                file_path = Path(original_file_path)
+                self.captioner.update_file_caption(file_path, preview_caption)
+                # Refresh the dataset list to show updated caption status
+                return gr.update(value="Caption saved successfully!")
+            else:
+                return gr.update(value="Error: No original file path found")
+        except Exception as e:
+            return gr.update(value=f"Error saving caption: {str(e)}")
+    async def update_titles_after_import(self, enable_splitting, enable_automatic_content_captioning, prompt_prefix):
+        """Handle post-import updates including titles"""
+        import_result = await self.on_import_success(enable_splitting, enable_automatic_content_captioning, prompt_prefix)
+        titles = self.update_titles()
+        return (
+            import_result["tabs"],
+            import_result["video_list"],
+            import_result["detect_status"],
+            *titles
+        )
+    def get_model_info(self, model_type: str) -> str:
+        """Get information about the selected model type"""
+        if model_type == "hunyuan_video":
+            return """### HunyuanVideo (LoRA)
+    - Required VRAM: ~48GB minimum
+    - Recommended batch size: 1-2
+    - Typical training time: 2-4 hours
+    - Default resolution: 49x512x768
+    - Default LoRA rank: 128 (~600 MB)"""
+        elif model_type == "ltx_video":
+            return """### LTX-Video (LoRA)
+    - Required VRAM: ~18GB minimum
+    - Recommended batch size: 1-4
+    - Typical training time: 1-3 hours
+    - Default resolution: 49x512x768
+    - Default LoRA rank: 128"""
+        return ""
+    def get_default_params(self, model_type: str) -> Dict[str, Any]:
+        """Get default training parameters for model type"""
+        if model_type == "hunyuan_video":
+            return {
+                "num_epochs": 70,
+                "batch_size": 1,
+                "learning_rate": 2e-5,
+                "save_iterations": 500,
+                "video_resolution_buckets": SMALL_TRAINING_BUCKETS,
+                "video_reshape_mode": "center",
+                "caption_dropout_p": 0.05,
+                "gradient_accumulation_steps": 1,
+                "rank": 128,
+                "lora_alpha": 128
+            }
+        else:  # ltx_video
+            return {
+                "num_epochs": 70,
+                "batch_size": 1,
+                "learning_rate": 3e-5,
+                "save_iterations": 500,
+                "video_resolution_buckets": SMALL_TRAINING_BUCKETS,
+                "video_reshape_mode": "center",
+                "caption_dropout_p": 0.05,
+                "gradient_accumulation_steps": 4,
+                "rank": 128,
+                "lora_alpha": 128
+            }
+    def preview_file(self, selected_text: str) -> Dict:
+        """Generate preview based on selected file
+        Args:
+            selected_text: Text of the selected item containing filename
+        Returns:
+            Dict with preview content for each preview component
+        """
+        if not selected_text or "Caption:" in selected_text:
+            return {
+                "video": None,
+                "image": None,
+                "text": None
+            }
+        # Extract filename from the preview text (remove size info)
+        filename = selected_text.split(" (")[0].strip()
+        file_path = TRAINING_VIDEOS_PATH / filename
+        if not file_path.exists():
+            return {
+                "video": None,
+                "image": None,
+                "text": f"File not found: {filename}"
+            }
+        # Detect file type
+        mime_type, _ = mimetypes.guess_type(str(file_path))
+        if not mime_type:
+            return {
+                "video": None,
+                "image": None,
+                "text": f"Unknown file type: {filename}"
+            }
+        # Return appropriate preview
+        if mime_type.startswith('video/'):
+            return {
+                "video": str(file_path),
+                "image": None,
+                "text": None
+            }
+        elif mime_type.startswith('image/'):
+            return {
+                "video": None,
+                "image": str(file_path),
+                "text": None
+            }
+        elif mime_type.startswith('text/'):
+            try:
+                text_content = file_path.read_text()
+                return {
+                    "video": None,
+                    "image": None,
+                    "text": text_content
+                }
+            except Exception as e:
+                return {
+                    "video": None,
+                    "image": None,
+                    "text": f"Error reading file: {str(e)}"
+                }
+        else:
+            return {
+                "video": None,
+                "image": None,
+                "text": f"Unsupported file type: {mime_type}"
+            }
+    def list_unprocessed_videos(self) -> gr.Dataframe:
+        """Update list of unprocessed videos"""
+        videos = self.splitter.list_unprocessed_videos()
+        # videos is already in [[name, status]] format from splitting_service
+        return gr.Dataframe(
+            headers=["name", "status"],
+            value=videos,
+            interactive=False
+        )
+    async def start_scene_detection(self, enable_splitting: bool) -> str:
+        """Start background scene detection process
+        Args:
+            enable_splitting: Whether to split videos into scenes
+        """
+        if self.splitter.is_processing():
+            return "Scene detection already running"
+        try:
+            await self.splitter.start_processing(enable_splitting)
+            return "Scene detection completed"
+        except Exception as e:
+            return f"Error during scene detection: {str(e)}"
+    def get_latest_status_message_and_logs(self) -> Tuple[str, str, str]:
+        state = self.trainer.get_status()
+        logs = self.trainer.get_logs()
+        # Parse new log lines
+        if logs:
+            last_state = None
+            for line in logs.splitlines():
+                state_update = self.log_parser.parse_line(line)
+                if state_update:
+                    last_state = state_update
+            if last_state:
+                ui_updates = self.update_training_ui(last_state)
+                state["message"] = ui_updates.get("status_box", state["message"])
+        # Parse status for training state
+        if "completed" in state["message"].lower():
+            state["status"] = "completed"
+        return (state["status"], state["message"], logs)
+    def get_latest_status_message_logs_and_button_labels(self) -> Tuple[str, str, Any, Any, Any]:
+        status, message, logs = self.get_latest_status_message_and_logs()
+        return (
+            message,
+            logs,
+            *self.update_training_buttons(status).values()
+        )
+    def get_latest_button_labels(self) -> Tuple[Any, Any, Any]:
+        status, message, logs = self.get_latest_status_message_and_logs()
+        return self.update_training_buttons(status).values()
+    def refresh_dataset(self):
+        """Refresh all dynamic lists and training state"""
+        video_list = self.splitter.list_unprocessed_videos()
+        training_dataset = self.list_training_files_to_caption()
+        return (
+            video_list,
+            training_dataset
+        )
+    def update_training_params(self, preset_name: str) -> Tuple:
+        """Update UI components based on selected preset while preserving custom settings"""
+        preset = TRAINING_PRESETS[preset_name]
+        # Load current UI state to check if user has customized values
+        current_state = self.load_ui_values()
+        # Find the display name that maps to our model type
+        model_display_name = next(
+            key for key, value in MODEL_TYPES.items()
+            if value == preset["model_type"]
+        )
+        # Get preset description for display
+        description = preset.get("description", "")
+        # Get max values from buckets
+        buckets = preset["training_buckets"]
+        max_frames = max(frames for frames, _, _ in buckets)
+        max_height = max(height for _, height, _ in buckets)
+        max_width = max(width for _, _, width in buckets)
+        bucket_info = f"\nMaximum video size: {max_frames} frames at {max_width}x{max_height} resolution"
+        info_text = f"{description}{bucket_info}"
+        # Return values in the same order as the output components
+        # Use preset defaults but preserve user-modified values if they exist
+        lora_rank_val = current_state.get("lora_rank") if current_state.get("lora_rank") != preset.get("lora_rank", "128") else preset["lora_rank"]
+        lora_alpha_val = current_state.get("lora_alpha") if current_state.get("lora_alpha") != preset.get("lora_alpha", "128") else preset["lora_alpha"]
+        num_epochs_val = current_state.get("num_epochs") if current_state.get("num_epochs") != preset.get("num_epochs", 70) else preset["num_epochs"]
+        batch_size_val = current_state.get("batch_size") if current_state.get("batch_size") != preset.get("batch_size", 1) else preset["batch_size"]
+        learning_rate_val = current_state.get("learning_rate") if current_state.get("learning_rate") != preset.get("learning_rate", 3e-5) else preset["learning_rate"]
+        save_iterations_val = current_state.get("save_iterations") if current_state.get("save_iterations") != preset.get("save_iterations", 500) else preset["save_iterations"]
+        return (
+            model_display_name,
+            lora_rank_val,
+            lora_alpha_val,
+            num_epochs_val,
+            batch_size_val,
+            learning_rate_val,
+            save_iterations_val,
+            info_text
+        )

vms/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from .parse_bool_env import parse_bool_env
+from .utils import validate_model_repo, make_archive, get_video_fps, extract_scene_info, is_image_file, is_video_file, parse_training_log, save_to_hub, format_size, count_media_files, format_media_title, add_prefix_to_caption, format_time
+from .training_log_parser import TrainingState, TrainingLogParser
+from .image_preprocessing import normalize_image
+from .video_preprocessing import remove_black_bars
+from .finetrainers_utils import prepare_finetrainers_dataset, copy_files_to_training_dir
+__all__ = [
+    'validate_model_repo',
+    'make_archive',
+    'get_video_fps',
+    'extract_scene_info',
+    'is_image_file',
+    'is_video_file',
+    'parse_bool_env',
+    'parse_training_log',
+    'save_to_hub',
+    'format_size',
+    'count_media_files',
+    'format_media_title',
+    'add_prefix_to_caption',
+    'format_time',
+    'TrainingState',
+    'TrainingLogParser',
+    'normalize_image',
+    'remove_black_bars',
+    'prepare_finetrainers_dataset',
+    'copy_files_to_training_dir',
+]

vms/{finetrainers_utils.py → utils/finetrainers_utils.py} RENAMED Viewed

@@ -4,7 +4,7 @@ import logging
 import shutil
 from typing import Any, Optional, Dict, List, Union, Tuple
-from .config import STORAGE_PATH, TRAINING_PATH, STAGING_PATH, TRAINING_VIDEOS_PATH, MODEL_PATH, OUTPUT_PATH, HF_API_TOKEN, MODEL_TYPES
 from .utils import get_video_fps, extract_scene_info, make_archive, is_image_file, is_video_file
 logger = logging.getLogger(__name__)

 import shutil
 from typing import Any, Optional, Dict, List, Union, Tuple
+from ..config import STORAGE_PATH, TRAINING_PATH, STAGING_PATH, TRAINING_VIDEOS_PATH, MODEL_PATH, OUTPUT_PATH, HF_API_TOKEN, MODEL_TYPES
 from .utils import get_video_fps, extract_scene_info, make_archive, is_image_file, is_video_file
 logger = logging.getLogger(__name__)

vms/{image_preprocessing.py → utils/image_preprocessing.py} RENAMED Viewed

@@ -5,7 +5,7 @@ from PIL import Image
 import pillow_avif
 import logging
-from .config import NORMALIZE_IMAGES_TO, JPEG_QUALITY
 logger = logging.getLogger(__name__)

 import pillow_avif
 import logging
+from ..config import NORMALIZE_IMAGES_TO, JPEG_QUALITY
 logger = logging.getLogger(__name__)

vms/utils/parse_bool_env.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from typing import Any, Optional, Dict, List, Union, Tuple
+def parse_bool_env(env_value: Optional[str]) -> bool:
+    """Parse environment variable string to boolean
+    Handles various true/false string representations:
+    - True: "true", "True", "TRUE", "1", etc
+    - False: "false", "False", "FALSE", "0", "", None
+    """
+    if not env_value:
+        return False
+    return str(env_value).lower() in ('true', '1', 't', 'y', 'yes')

vms/{training_log_parser.py → utils/training_log_parser.py} RENAMED Viewed

File without changes

vms/{utils.py → utils/utils.py} RENAMED Viewed

File without changes

vms/{video_preprocessing.py → utils/video_preprocessing.py} RENAMED Viewed

File without changes