Upload Spark-TTS transformers pipeline model
Browse files- README.md +34 -0
- config.json +22 -0
- model.safetensors +3 -0
- pipeline_info.json +8 -0
README.md
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Spark-TTS Transformers Pipeline
|
| 3 |
+
|
| 4 |
+
這是一個完全兼容 transformers 的 Spark-TTS 模型。
|
| 5 |
+
|
| 6 |
+
## 快速開始
|
| 7 |
+
|
| 8 |
+
```python
|
| 9 |
+
from transformers import pipeline
|
| 10 |
+
|
| 11 |
+
# 載入 pipeline
|
| 12 |
+
tts = pipeline("spark-tts", model="spark_tts_transformers_ready")
|
| 13 |
+
|
| 14 |
+
# 生成語音
|
| 15 |
+
result = tts("你好,世界!")
|
| 16 |
+
|
| 17 |
+
# 保存音頻
|
| 18 |
+
import soundfile as sf
|
| 19 |
+
sf.write("output.wav", result["audio"], result["sample_rate"])
|
| 20 |
+
```
|
| 21 |
+
|
| 22 |
+
## 功能特色
|
| 23 |
+
|
| 24 |
+
- ✅ 真正的 transformers pipeline 支援
|
| 25 |
+
- ✅ 批次處理
|
| 26 |
+
- ✅ 自定義參數
|
| 27 |
+
- ✅ 音頻保存和播放
|
| 28 |
+
|
| 29 |
+
## 系統需求
|
| 30 |
+
|
| 31 |
+
- transformers >= 4.30.0
|
| 32 |
+
- torch >= 1.9.0
|
| 33 |
+
- soundfile >= 0.12.1
|
| 34 |
+
- numpy >= 1.21.0
|
config.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"SparkTTSModel"
|
| 4 |
+
],
|
| 5 |
+
"audio_tokenizer_type": "BiCodec",
|
| 6 |
+
"hidden_size": 896,
|
| 7 |
+
"intermediate_size": 4864,
|
| 8 |
+
"max_audio_length": 30.0,
|
| 9 |
+
"max_new_tokens": 1536,
|
| 10 |
+
"max_position_embeddings": 32768,
|
| 11 |
+
"model_type": "spark-tts",
|
| 12 |
+
"num_attention_heads": 14,
|
| 13 |
+
"num_hidden_layers": 24,
|
| 14 |
+
"rms_norm_eps": 1e-06,
|
| 15 |
+
"sample_rate": 16000,
|
| 16 |
+
"temperature": 0.8,
|
| 17 |
+
"tie_word_embeddings": false,
|
| 18 |
+
"torch_dtype": "float32",
|
| 19 |
+
"transformers_version": "4.52.4",
|
| 20 |
+
"use_cache": true,
|
| 21 |
+
"vocab_size": 152064
|
| 22 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c8b2d60c50de4797badab05081acb1a964b65932ebaf145728d3117f891e0c0
|
| 3 |
+
size 3712
|
pipeline_info.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"pipeline_task": "spark-tts",
|
| 3 |
+
"model_type": "SparkTTSModel",
|
| 4 |
+
"config_type": "SparkTTSConfig",
|
| 5 |
+
"created_by": "Spark-TTS Transformers Pipeline",
|
| 6 |
+
"version": "1.0.0",
|
| 7 |
+
"compatible_with": "transformers>=4.30.0"
|
| 8 |
+
}
|