{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "execution": { "iopub.execute_input": "2025-06-15T14:21:25.974502Z", "iopub.status.busy": "2025-06-15T14:21:25.974227Z", "iopub.status.idle": "2025-06-15T14:21:31.475226Z", "shell.execute_reply": "2025-06-15T14:21:31.474663Z", "shell.execute_reply.started": "2025-06-15T14:21:25.974478Z" }, "trusted": true }, "outputs": [], "source": [ "import os\n", "os.system(\"pip install -q wget\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:21:31.476734Z", "iopub.status.busy": "2025-06-15T14:21:31.476449Z", "iopub.status.idle": "2025-06-15T14:21:37.092039Z", "shell.execute_reply": "2025-06-15T14:21:37.091491Z", "shell.execute_reply.started": "2025-06-15T14:21:31.476715Z" }, "trusted": true }, "outputs": [], "source": [ "import wget\n", "import tarfile\n", "import torchaudio\n", "import pandas as pd\n", "from huggingface_hub import snapshot_download, login\n", "login(\"\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:21:37.092984Z", "iopub.status.busy": "2025-06-15T14:21:37.092705Z", "iopub.status.idle": "2025-06-15T14:21:37.096562Z", "shell.execute_reply": "2025-06-15T14:21:37.096039Z", "shell.execute_reply.started": "2025-06-15T14:21:37.092967Z" }, "trusted": true }, "outputs": [], "source": [ "os.chdir(\"/content\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T13:59:06.772020Z", "iopub.status.busy": "2025-06-15T13:59:06.771694Z", "iopub.status.idle": "2025-06-15T14:00:28.043176Z", "shell.execute_reply": "2025-06-15T14:00:28.041603Z", "shell.execute_reply.started": "2025-06-15T13:59:06.771995Z" }, "trusted": true }, "outputs": [], "source": [ "from huggingface_hub import HfApi\n", "from huggingface_hub import snapshot_download\n", "import os\n", "api = HfApi()\n", "!git lfs install --force\n", "\n", "# Define the dataset name and local directory\n", "\n", "repo_id = \"heboya8/t5-tts-temp-model\"\n", "save_path = \".\"\n", "\n", "# Create the directory if it doesn't exist\n", "os.makedirs(save_path, exist_ok=True)\n", "\n", "# Download the dataset\n", "snapshot_download(repo_id=repo_id, repo_type=\"model\", local_dir=save_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:21:37.389642Z", "iopub.status.busy": "2025-06-15T14:21:37.389399Z", "iopub.status.idle": "2025-06-15T14:24:47.468892Z", "shell.execute_reply": "2025-06-15T14:24:47.468139Z", "shell.execute_reply.started": "2025-06-15T14:21:37.389623Z" }, "trusted": true }, "outputs": [], "source": [ "# Step 1: Set Up the Environment\n", "os.system(\"pip install -e . >/dev/null 2>&1\")\n", "os.system(\"pip install torch==2.4.0+cu124 torchaudio==2.4.0+cu124 torchvision==0.19.0+cu124 --extra-index-url https://download.pytorch.org/whl/cu124 >/dev/null 2>&1\")\n", "os.system(\"pip install accelerate==0.33.0 tensorboard >/dev/null 2>&1\")\n", "if not os.path.exists(\"F5-TTS\"):\n", " os.system(\"git clone https://github.com/SWivid/F5-TTS.git\")\n", "os.chdir(\"F5-TTS\")\n", "os.system(\"pip install -e . >/dev/null 2>&1\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:24:47.470454Z", "iopub.status.busy": "2025-06-15T14:24:47.470177Z", "iopub.status.idle": "2025-06-15T14:24:47.473922Z", "shell.execute_reply": "2025-06-15T14:24:47.473261Z", "shell.execute_reply.started": "2025-06-15T14:24:47.470429Z" }, "trusted": true }, "outputs": [], "source": [ "os.chdir(\"/content/F5-TTS\")\n", " # os.chdir(\"F5-TTS-Vietnamese\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T06:47:34.909957Z", "iopub.status.busy": "2025-06-15T06:47:34.909372Z", "iopub.status.idle": "2025-06-15T06:47:35.040348Z", "shell.execute_reply": "2025-06-15T06:47:35.039424Z", "shell.execute_reply.started": "2025-06-15T06:47:34.909927Z" }, "trusted": true }, "outputs": [], "source": [ "!pwd" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:24:47.475053Z", "iopub.status.busy": "2025-06-15T14:24:47.474827Z", "iopub.status.idle": "2025-06-15T14:24:47.644337Z", "shell.execute_reply": "2025-06-15T14:24:47.643562Z", "shell.execute_reply.started": "2025-06-15T14:24:47.475031Z" }, "trusted": true }, "outputs": [], "source": [ "!mkdir ./ckpts/vin100h-preprocessed-v2\n", "# !cp /kaggle/input/vi-fine-tuned-t5-tts/69/model_last.pt \\\n", "# ./ckpts/vin100h-preprocessed-v2\n", "# !cp -r /content/73/* ./ckpts/vin100h-preprocessed-v2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:24:47.646473Z", "iopub.status.busy": "2025-06-15T14:24:47.646278Z", "iopub.status.idle": "2025-06-15T14:25:20.275283Z", "shell.execute_reply": "2025-06-15T14:25:20.274453Z", "shell.execute_reply.started": "2025-06-15T14:24:47.646454Z" }, "trusted": true }, "outputs": [], "source": [ "# !cp -r /kaggle/input/vi-fine-tuned-t5-tts/7/* ./ckpts\n", "!cp -r /kaggle/input/vi-fine-tuned-t5-tts/75/model_last.pt \\\n", " ./ckpts/vin100h-preprocessed-v2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:25:20.276407Z", "iopub.status.busy": "2025-06-15T14:25:20.276159Z", "iopub.status.idle": "2025-06-15T14:25:20.413414Z", "shell.execute_reply": "2025-06-15T14:25:20.412180Z", "shell.execute_reply.started": "2025-06-15T14:25:20.276382Z" }, "trusted": true }, "outputs": [], "source": [ "!ls -a ./ckpts/vin100h-preprocessed-v2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-10T15:59:08.329794Z", "iopub.status.busy": "2025-05-10T15:59:08.329442Z", "iopub.status.idle": "2025-05-10T15:59:09.362207Z", "shell.execute_reply": "2025-05-10T15:59:09.361253Z", "shell.execute_reply.started": "2025-05-10T15:59:08.329757Z" }, "trusted": true }, "outputs": [], "source": [ "import json\n", "import os\n", "from pathlib import Path\n", "import shutil\n", "import torchaudio\n", "from datasets import load_dataset\n", "from datasets.arrow_writer import ArrowWriter\n", "from tqdm import tqdm\n", "import soundfile as sf\n", "import csv\n", "\n", "def save_dataset_to_local_disk(output_dir=\"./data/vin100h-preprocessed-v2\",\n", " base_model=\"htdung167/vin100h-preprocessed-v2\",\n", " audio_header='audio',\n", " text_header='transcription'):\n", " \n", " wavs_dir = os.path.join(output_dir, \"wavs\")\n", " metadata_path = os.path.join(output_dir, \"metadata.csv\")\n", " os.makedirs(wavs_dir, exist_ok=True)\n", "\n", " ds = load_dataset(base_model)['train']\n", " metadata = []\n", "\n", " for idx, sample in tqdm(enumerate(ds), total=len(ds),\n", " desc=\"Saving samples to directory\"):\n", " audio_array = sample[audio_header]['array']\n", " sampling_rate = sample[audio_header]['sampling_rate']\n", " filename = f\"audio_{idx:06d}.wav\"\n", " sf.write(os.path.join(wavs_dir, filename), audio_array, sampling_rate)\n", " # metadata.append([f\"wavs/{filename}\", sample['preprocessed_sentence_v2']])\n", " metadata.append([f\"wavs/{filename}\", sample[text_header]])\n", " # metadata.append([f\"{filename}\", sample['transcription']])\n", " \n", " with open(metadata_path, 'w', newline='', encoding='utf-8') as f:\n", " csv.writer(f, delimiter='|').writerows(metadata)\n", "\n", " print(f\"Dataset saved to {output_dir}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-10T15:59:10.399030Z", "iopub.status.busy": "2025-05-10T15:59:10.397916Z", "iopub.status.idle": "2025-05-10T16:10:46.269067Z", "shell.execute_reply": "2025-05-10T16:10:46.267298Z", "shell.execute_reply.started": "2025-05-10T15:59:10.398995Z" }, "trusted": true }, "outputs": [], "source": [ "output_dir = \"./data/vin100h-preprocessed-v2\"\n", "tokenizer_type = \"pinyin\"\n", "\n", "save_dataset_to_local_disk(output_dir=output_dir,\n", " base_model=\"htdung167/vin100h-preprocessed-v2\",\n", " text_header=\"preprocessed_sentence_v2\"\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "_kg_hide-output": true, "execution": { "iopub.execute_input": "2025-05-10T16:10:46.273403Z", "iopub.status.busy": "2025-05-10T16:10:46.272176Z", "iopub.status.idle": "2025-05-10T17:15:19.405258Z", "shell.execute_reply": "2025-05-10T17:15:19.402002Z", "shell.execute_reply.started": "2025-05-10T16:10:46.273366Z" }, "trusted": true }, "outputs": [], "source": [ "!python ./src/f5_tts/train/datasets/prepare_csv_wavs.py \\\n", " \"./data/vin100h-preprocessed-v2\" \\\n", " \"./data/vin100h-preprocessed-v2_pinyin\" \\\n", " --workers 4 # Sets the number of parallel processes for preprocessing." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:25:20.414900Z", "iopub.status.busy": "2025-06-15T14:25:20.414621Z", "iopub.status.idle": "2025-06-15T14:25:21.649820Z", "shell.execute_reply": "2025-06-15T14:25:21.648942Z", "shell.execute_reply.started": "2025-06-15T14:25:20.414873Z" }, "trusted": true }, "outputs": [], "source": [ "%%writefile ./src/f5_tts/configs/vi-fine-tuned-t5-tts.yaml\n", "hydra:\n", " run:\n", " dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}\n", "\n", "datasets:\n", " name: vin100h-preprocessed-v2 # dataset name\n", " batch_size_per_gpu: 3200 # 1 GPUs, 1 * 3200 = 3200\n", " batch_size_type: frame # frame | sample\n", " max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models\n", " num_workers: 4\n", "\n", "optim:\n", " epochs: 10\n", " learning_rate: 1e-5\n", " num_warmup_updates: 2761 # warmup updates\n", " grad_accumulation_steps: 2 # note: updates = steps / grad_accumulation_steps\n", " max_grad_norm: 1.0 # gradient clipping\n", " bnb_optimizer: False # use bnb 8bit AdamW optimizer or not\n", "\n", "model:\n", " name: vi_fine_tuned_t5_tts # model name\n", " tokenizer: pinyin # tokenizer type\n", " tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)\n", " backbone: DiT\n", " arch:\n", " dim: 1024\n", " depth: 22\n", " heads: 16\n", " ff_mult: 2\n", " text_dim: 512\n", " text_mask_padding: False\n", " conv_layers: 4\n", " pe_attn_head: 1\n", " checkpoint_activations: False # recompute activations and save memory for extra compute\n", " mel_spec:\n", " target_sample_rate: 24000\n", " n_mel_channels: 100\n", " hop_length: 256\n", " win_length: 1024\n", " n_fft: 1024\n", " mel_spec_type: vocos # vocos | bigvgan\n", " vocoder:\n", " is_local: False # use local offline ckpt or not\n", " local_path: null # local vocoder path\n", "\n", "ckpts:\n", " logger: null # wandb | tensorboard | null\n", " log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples\n", " save_per_updates: 4000 # save checkpoint per updates\n", " keep_last_n_checkpoints: 1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints\n", " last_per_updates: 4000 # save last checkpoint per updates\n", " save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:25:21.651011Z", "iopub.status.busy": "2025-06-15T14:25:21.650749Z", "iopub.status.idle": "2025-06-15T14:25:22.958480Z", "shell.execute_reply": "2025-06-15T14:25:22.957781Z", "shell.execute_reply.started": "2025-06-15T14:25:21.650992Z" }, "trusted": true }, "outputs": [], "source": [ "!echo hello" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:25:22.959726Z", "iopub.status.busy": "2025-06-15T14:25:22.959476Z", "iopub.status.idle": "2025-06-15T14:25:38.131765Z", "shell.execute_reply": "2025-06-15T14:25:38.130931Z", "shell.execute_reply.started": "2025-06-15T14:25:22.959692Z" }, "trusted": true }, "outputs": [], "source": [ "!accelerate config default" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:28:31.671797Z", "iopub.status.busy": "2025-06-15T14:28:31.671483Z", "iopub.status.idle": "2025-06-15T14:28:31.803519Z", "shell.execute_reply": "2025-06-15T14:28:31.802848Z", "shell.execute_reply.started": "2025-06-15T14:28:31.671770Z" }, "trusted": true }, "outputs": [], "source": [ "!echo go" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:28:31.804624Z", "iopub.status.busy": "2025-06-15T14:28:31.804419Z", "iopub.status.idle": "2025-06-15T17:59:02.693078Z", "shell.execute_reply": "2025-06-15T17:59:02.692025Z", "shell.execute_reply.started": "2025-06-15T14:28:31.804591Z" }, "trusted": true }, "outputs": [], "source": [ "# ************\n", "!accelerate launch ./src/f5_tts/train/finetune_cli.py \\\n", " --exp_name F5TTS_Base \\\n", " --dataset_name vin100h-preprocessed-v2 \\\n", " --finetune \\\n", " --tokenizer pinyin \\\n", " --learning_rate 1e-05 \\\n", " --batch_size_type frame \\\n", " --batch_size_per_gpu 3200 \\\n", " --max_samples 64 \\\n", " --grad_accumulation_steps 2 \\\n", " --max_grad_norm 1 \\\n", " --epochs 76 \\\n", " --num_warmup_updates 2761 \\\n", " --save_per_updates 4000 \\\n", " --keep_last_n_checkpoints 1 \\\n", " --last_per_updates 4000 \\\n", " --log_samples \\\n", " --pretrain ./ckpts/vin100h-preprocessed-v2/model_last.pt\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T18:05:50.705629Z", "iopub.status.busy": "2025-06-15T18:05:50.704903Z", "iopub.status.idle": "2025-06-15T18:05:50.891227Z", "shell.execute_reply": "2025-06-15T18:05:50.890434Z", "shell.execute_reply.started": "2025-06-15T18:05:50.705578Z" }, "trusted": true }, "outputs": [], "source": [ "!echo abc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Copy and save" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-14T10:18:46.384990Z", "iopub.status.busy": "2025-06-14T10:18:46.384685Z", "iopub.status.idle": "2025-06-14T10:18:46.518166Z", "shell.execute_reply": "2025-06-14T10:18:46.517174Z", "shell.execute_reply.started": "2025-06-14T10:18:46.384965Z" }, "trusted": true }, "outputs": [], "source": [ "!rm -rf /kaggle/working/.cache" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-07T16:58:20.250613Z", "iopub.status.busy": "2025-06-07T16:58:20.250305Z", "iopub.status.idle": "2025-06-07T16:58:20.446725Z", "shell.execute_reply": "2025-06-07T16:58:20.445927Z", "shell.execute_reply.started": "2025-06-07T16:58:20.250588Z" }, "trusted": true }, "outputs": [], "source": [ "!ls -a ckpts/vin100h-preprocessed-v2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T18:06:00.980687Z", "iopub.status.busy": "2025-06-15T18:06:00.979884Z", "iopub.status.idle": "2025-06-15T18:06:07.418545Z", "shell.execute_reply": "2025-06-15T18:06:07.417240Z", "shell.execute_reply.started": "2025-06-15T18:06:00.980649Z" }, "trusted": true }, "outputs": [], "source": [ "# *******************Importance\n", "model_dir = \"/kaggle/working/76\"\n", "os.makedirs(model_dir, exist_ok=True)\n", "!cp -r ./ckpts/vin100h-preprocessed-v2/model_last.pt $model_dir" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.status.busy": "2025-06-14T10:34:21.049620Z", "iopub.status.idle": "2025-06-14T10:34:21.049856Z", "shell.execute_reply": "2025-06-14T10:34:21.049753Z", "shell.execute_reply.started": "2025-06-14T10:34:21.049740Z" }, "trusted": true }, "outputs": [], "source": [ "# To temporary Model hub\n", "from huggingface_hub import HfApi\n", "from huggingface_hub import snapshot_download\n", "# Initialize API\n", "api = HfApi()\n", "\n", "# Upload the folder to the repository root\n", "api.upload_large_folder(\n", " folder_path=\"/kaggle/working\", # Local folder path\n", " repo_id=\"heboya8/t5-tts-temp-model\",\n", " repo_type=\"model\"\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Prune Checkpoint" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-11T14:11:57.837831Z", "iopub.status.busy": "2025-05-11T14:11:57.837476Z", "iopub.status.idle": "2025-05-11T14:11:57.844498Z", "shell.execute_reply": "2025-05-11T14:11:57.843701Z", "shell.execute_reply.started": "2025-05-11T14:11:57.837803Z" }, "trusted": true }, "outputs": [], "source": [ "import torch\n", "\n", "def prune_checkpoint(checkpoint_path: str, new_checkpoint_path: str, save_ema: bool, safetensors: bool) -> str:\n", " try:\n", " checkpoint = torch.load(checkpoint_path, weights_only=True)\n", " print(\"Original Checkpoint Keys:\", checkpoint.keys())\n", "\n", " to_retain = \"ema_model_state_dict\" if save_ema else \"model_state_dict\"\n", " try:\n", " model_state_dict_to_retain = checkpoint[to_retain]\n", " except KeyError:\n", " return f\"{to_retain} not found in the checkpoint.\"\n", "\n", " if safetensors:\n", " new_checkpoint_path = new_checkpoint_path.replace(\".pt\", \".safetensors\")\n", " save_file(model_state_dict_to_retain, new_checkpoint_path)\n", " else:\n", " new_checkpoint_path = new_checkpoint_path.replace(\".safetensors\", \".pt\")\n", " new_checkpoint = {\"ema_model_state_dict\": model_state_dict_to_retain}\n", " torch.save(new_checkpoint, new_checkpoint_path)\n", "\n", " return f\"New checkpoint saved at: {new_checkpoint_path}\"\n", "\n", " except Exception as e:\n", " return f\"An error occurred: {e}\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-11T14:22:24.624318Z", "iopub.status.busy": "2025-05-11T14:22:24.623974Z", "iopub.status.idle": "2025-05-11T14:22:30.316195Z", "shell.execute_reply": "2025-05-11T14:22:30.315529Z", "shell.execute_reply.started": "2025-05-11T14:22:24.624292Z" }, "trusted": true }, "outputs": [], "source": [ "# Prune a checkpoint after training resize model\n", "result = prune_checkpoint(\n", " checkpoint_path=\"/kaggle/working/F5-TTS/ckpts/vin100h-preprocessed-v2/model_last.pt\",\n", " new_checkpoint_path=\"/root/.cache/abc.pt\",\n", " save_ema=False,\n", " safetensors=False\n", ")\n", "print(result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Inference" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-20T17:08:02.683953Z", "iopub.status.busy": "2025-05-20T17:08:02.683595Z", "iopub.status.idle": "2025-05-20T17:08:02.753448Z", "shell.execute_reply": "2025-05-20T17:08:02.752714Z", "shell.execute_reply.started": "2025-05-20T17:08:02.683922Z" }, "trusted": true }, "outputs": [], "source": [ "from IPython.display import Audio\n", "\n", "# Path to your audio file\n", "audio_path = './data/vin100h-preprocessed-v2/wavs/audio_000010.wav'\n", "\n", "# Display and play the audio\n", "Audio(audio_path)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-14T10:24:03.249295Z", "iopub.status.busy": "2025-06-14T10:24:03.248968Z", "iopub.status.idle": "2025-06-14T10:24:41.393133Z", "shell.execute_reply": "2025-06-14T10:24:41.391987Z", "shell.execute_reply.started": "2025-06-14T10:24:03.249273Z" }, "trusted": true }, "outputs": [], "source": [ "!python ./src/f5_tts/infer/infer_cli.py \\\n", " --model \"vin100h-preprocessed-v2\" \\\n", " --model_cfg \"./src/f5_tts/configs/F5TTS_Base.yaml\" \\\n", " --ckpt_file \"./ckpts/vin100h-preprocessed-v2/model_last.pt\" \\\n", " --vocab_file \"./data/vin100h-preprocessed-v2_pinyin/vocab.txt\" \\\n", " --ref_audio \"./data/vin100h-preprocessed-v2/wavs/audio_000010.wav\" \\\n", " --ref_text \"Về giá cả so với giá bán ngoài các siêu thị thì dâu trái ở đây rẻ hơn khá nhiều. Giả sử như bó rau ở siêu thị bán khoảng 2 đô la một bó thì ở đây chỉ có một đô la một bó. Có khi mua 50 bó được tặng thêm một bó nữa.\" \\\n", " --gen_text \"Về giá cả so với giá bán ngoài các siêu thị\" \\\n", " --output_dir \"/kaggle/working/\"\n", " # --output_file \"/content/abc.wav\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-14T10:24:41.395230Z", "iopub.status.busy": "2025-06-14T10:24:41.394917Z", "iopub.status.idle": "2025-06-14T10:24:41.404325Z", "shell.execute_reply": "2025-06-14T10:24:41.403321Z", "shell.execute_reply.started": "2025-06-14T10:24:41.395199Z" }, "trusted": true }, "outputs": [], "source": [ "from IPython.display import Audio\n", "\n", "# Path to your audio file\n", "audio_path = '/kaggle/working/infer_cli_basic.wav'\n", "\n", "# Display and play the audio\n", "Audio(audio_path)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Download" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:25:38.133173Z", "iopub.status.busy": "2025-06-15T14:25:38.132898Z", "iopub.status.idle": "2025-06-15T14:26:12.006111Z", "shell.execute_reply": "2025-06-15T14:26:12.005444Z", "shell.execute_reply.started": "2025-06-15T14:25:38.133137Z" }, "trusted": true }, "outputs": [], "source": [ "from huggingface_hub import HfApi\n", "from huggingface_hub import snapshot_download\n", "import os\n", "api = HfApi()\n", "!git lfs install --force\n", "\n", "# Define the dataset name and local directory\n", "repo_id = \"heboya8/f5-tts-dataset\"\n", "save_path = \"/root/.cache\"\n", "\n", "# Create the directory if it doesn't exist\n", "os.makedirs(save_path, exist_ok=True)\n", "\n", "# Download the dataset\n", "snapshot_download(repo_id=repo_id, repo_type=\"dataset\", local_dir=save_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-06-15T14:26:12.009357Z", "iopub.status.busy": "2025-06-15T14:26:12.009122Z", "iopub.status.idle": "2025-06-15T14:28:31.670192Z", "shell.execute_reply": "2025-06-15T14:28:31.669158Z", "shell.execute_reply.started": "2025-06-15T14:26:12.009338Z" }, "trusted": true }, "outputs": [], "source": [ "!unzip -q -o /root/.cache/data_compress.zip -d \".\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Upload" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-10T20:06:26.721683Z", "iopub.status.busy": "2025-05-10T20:06:26.720825Z", "iopub.status.idle": "2025-05-10T20:11:36.850624Z", "shell.execute_reply": "2025-05-10T20:11:36.849599Z", "shell.execute_reply.started": "2025-05-10T20:06:26.721632Z" }, "trusted": true }, "outputs": [], "source": [ "from huggingface_hub import HfApi\n", "from huggingface_hub import snapshot_download\n", "# Initialize API\n", "api = HfApi()\n", "\n", "# Upload the folder to the repository root\n", "api.upload_large_folder(\n", " folder_path=\"/root/.cache/dataset\", # Local folder path\n", " repo_id=\"heboya8/f5-tts-dataset\",\n", " repo_type=\"dataset\",\n", " # multi_commits=True, # Enable resumable uploads\n", " # multi_commits_verbose=True # Show progress\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## /kaggle/working/F5-TTS/ckpts/vin100h-preprocessed-v2/model_last.ptDowload Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-10T20:16:38.191744Z", "iopub.status.busy": "2025-05-10T20:16:38.191338Z", "iopub.status.idle": "2025-05-10T20:16:56.134770Z", "shell.execute_reply": "2025-05-10T20:16:56.133810Z", "shell.execute_reply.started": "2025-05-10T20:16:38.191712Z" }, "trusted": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-10T20:19:28.100798Z", "iopub.status.busy": "2025-05-10T20:19:28.099915Z", "iopub.status.idle": "2025-05-10T20:19:28.249902Z", "shell.execute_reply": "2025-05-10T20:19:28.248723Z", "shell.execute_reply.started": "2025-05-10T20:19:28.100762Z" }, "trusted": true }, "outputs": [], "source": [ "!mkdir dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-10T20:20:05.322822Z", "iopub.status.busy": "2025-05-10T20:20:05.322019Z", "iopub.status.idle": "2025-05-10T20:20:05.567705Z", "shell.execute_reply": "2025-05-10T20:20:05.566624Z", "shell.execute_reply.started": "2025-05-10T20:20:05.322785Z" }, "trusted": true }, "outputs": [], "source": [ "!rm -rf d /root/.cache/dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-05-10T20:20:07.132689Z", "iopub.status.busy": "2025-05-10T20:20:07.132287Z", "iopub.status.idle": "2025-05-10T20:22:58.875583Z", "shell.execute_reply": "2025-05-10T20:22:58.874368Z", "shell.execute_reply.started": "2025-05-10T20:20:07.132656Z" }, "trusted": true }, "outputs": [], "source": [ "!unzip -q /kaggle/working/F5-TTS/~/.cache/data_compress.zip -d /root/.cache/dataset" ] } ], "metadata": { "kaggle": { "accelerator": "none", "dataSources": [ { "sourceId": 245622735, "sourceType": "kernelVersion" } ], "dockerImageVersionId": 31012, "isGpuEnabled": false, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" } }, "nbformat": 4, "nbformat_minor": 4 }