Upload 6 files
Browse files- Dockerfile +34 -0
- README.md +82 -4
- astrbot_plugin_config_example.json +5 -0
- download_support_models.py +17 -0
- reference_audio/ref_shantianliang_1.wav +0 -0
- weights/README_PLACEHOLDER.txt +7 -0
Dockerfile
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Base image with PyTorch + CUDA 12.1 runtime
|
2 |
+
FROM pytorch/pytorch:2.5.1-cuda12.1-cudnn9-runtime
|
3 |
+
|
4 |
+
ENV PYTHONUNBUFFERED=1 \
|
5 |
+
PIP_DISABLE_PIP_VERSION_CHECK=1
|
6 |
+
|
7 |
+
WORKDIR /app
|
8 |
+
|
9 |
+
# System deps
|
10 |
+
RUN apt-get update && \
|
11 |
+
apt-get install -y --no-install-recommends ffmpeg libsox-dev git && \
|
12 |
+
rm -rf /var/lib/apt/lists/*
|
13 |
+
|
14 |
+
# Get GPT-SoVITS source
|
15 |
+
RUN git clone --depth 1 https://github.com/RVC-Boss/GPT-SoVITS.git /app
|
16 |
+
|
17 |
+
# Python deps (repo's + API server)
|
18 |
+
RUN pip install --upgrade pip && \
|
19 |
+
pip install --no-deps --no-cache-dir -r /app/extra-req.txt && \
|
20 |
+
pip install --no-cache-dir -r /app/requirements.txt && \
|
21 |
+
pip install --no-cache-dir fastapi uvicorn soundfile huggingface_hub ffmpeg-python
|
22 |
+
|
23 |
+
# Pre-download essential support models (Chinese frontends & encoders, sv/*)
|
24 |
+
COPY download_support_models.py /app/download_support_models.py
|
25 |
+
RUN python /app/download_support_models.py || true
|
26 |
+
|
27 |
+
# Put your weights and reference audio into image
|
28 |
+
COPY weights/ /app/pretrained_models/shantianliang/
|
29 |
+
COPY reference_audio/ /app/reference_audio/
|
30 |
+
|
31 |
+
EXPOSE 7860
|
32 |
+
|
33 |
+
# Start REST API v2 (FastAPI)
|
34 |
+
CMD ["python", "api_v2.py", "-a", "0.0.0.0", "-p", "7860", "-c", "GPT_SoVITS/configs/tts_infer.yaml"]
|
README.md
CHANGED
@@ -1,10 +1,88 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: yellow
|
5 |
colorTo: red
|
6 |
sdk: docker
|
7 |
-
|
|
|
8 |
---
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: GPT-SoVITS API (v2 ProPlus) for AstrBot
|
3 |
+
emoji: 🗣️
|
4 |
colorFrom: yellow
|
5 |
colorTo: red
|
6 |
sdk: docker
|
7 |
+
app_port: 7860
|
8 |
+
license: mit
|
9 |
---
|
10 |
|
11 |
+
# GPT‑SoVITS v2 ProPlus — REST API for AstrBot (Docker Space)
|
12 |
+
|
13 |
+
这个 Space 已经为 **AstrBot** 的 `astrbot_plugin_GPT_SoVITS` 插件准备好了一个 **REST API 服务端 (api_v2.py)**。
|
14 |
+
你只需把 **你现成的模型权重 + 参考音频** 放到下列路径,然后点击 **Restart and rebuild** 即可。
|
15 |
+
|
16 |
+
## 放置你的文件(请先重命名,避免中文/空格/加号等字符)
|
17 |
+
|
18 |
+
- **GPT 权重(.ckpt)** → `weights/shantianliang_proplus_e32.ckpt`
|
19 |
+
(把你本地的 `shantianliangPROpius-e32.ckpt` 改名为上面这个)
|
20 |
+
|
21 |
+
- **SoVITS 权重(.pth)** → `weights/shantianliang_proplus_e8_s192.pth`
|
22 |
+
(把你本地的 `shantianliangPRO+_e8_s192.pth` 改名为上面这个)
|
23 |
+
|
24 |
+
- **参考音频(.wav)** → `reference_audio/ref_shantianliang_1.wav`
|
25 |
+
(把你本地的 `山田凉参考音频1.wav` 改名为上面这个)
|
26 |
+
|
27 |
+
> 你现在看到的 `reference_audio/ref_shantianliang_1.wav` 是一个 **占位的静音文件**,用来保证服务能启动。请用你的真实参考音频替换。
|
28 |
+
|
29 |
+
## 启动后的测试
|
30 |
+
|
31 |
+
- **切 SoVITS 权重**
|
32 |
+
```bash
|
33 |
+
curl -G "https://<你的空间>.hf.space/set_sovits_weights" --data-urlencode "weights_path=/app/pretrained_models/shantianliang/shantianliang_proplus_e8_s192.pth"
|
34 |
+
```
|
35 |
+
|
36 |
+
- **切 GPT 权重**
|
37 |
+
```bash
|
38 |
+
curl -G "https://<你的空间>.hf.space/set_gpt_weights" --data-urlencode "weights_path=/app/pretrained_models/shantianliang/shantianliang_proplus_e32.ckpt"
|
39 |
+
```
|
40 |
+
|
41 |
+
- **合成 TTS(POST,推荐)**
|
42 |
+
```bash
|
43 |
+
curl -L "https://<你的空间>.hf.space/tts" -H "Content-Type: application/json" -d '{
|
44 |
+
"text": "今天来测试一下山田凉的声音,欢迎收听。",
|
45 |
+
"text_lang": "zh",
|
46 |
+
"ref_audio_path": "/app/reference_audio/ref_shantianliang_1.wav",
|
47 |
+
"prompt_lang": "zh",
|
48 |
+
"prompt_text": "这是山田凉的参考音频",
|
49 |
+
"media_type": "wav",
|
50 |
+
"streaming_mode": false
|
51 |
+
}' -o out.wav
|
52 |
+
```
|
53 |
+
|
54 |
+
- **流式(边播边收)**
|
55 |
+
```bash
|
56 |
+
curl -N -L "https://<你的空间>.hf.space/tts?text=流式测试&text_lang=zh&ref_audio_path=/app/reference_audio/ref_shantianliang_1.wav&prompt_lang=zh&prompt_text=参考提示&media_type=wav&streaming_mode=true" -o stream.wav
|
57 |
+
```
|
58 |
+
|
59 |
+
> 以上接口与参数来自 `api_v2.py`。记得把 `<你的空间>` 换成实际 Space 名称。
|
60 |
+
|
61 |
+
## AstrBot 插件如何填
|
62 |
+
|
63 |
+
在 AstrBot 的 **astrbot_plugin_GPT_SoVITS** 插件配置里:
|
64 |
+
- **base_url**:`https://<你的空间>.hf.space`
|
65 |
+
- **gpt_weights_path**:`/app/pretrained_models/shantianliang/shantianliang_proplus_e32.ckpt`
|
66 |
+
- **sovits_weights_path**:`/app/pretrained_models/shantianliang/shantianliang_proplus_e8_s192.pth`
|
67 |
+
|
68 |
+
然后就可以用插件的命令(如 `/说 你好`、`/生气地说 ...`)或自动触发来合成语音了。
|
69 |
+
|
70 |
+
## 目录结构(上传前)
|
71 |
+
|
72 |
+
```
|
73 |
+
/
|
74 |
+
├─ Dockerfile
|
75 |
+
├─ download_support_models.py
|
76 |
+
├─ README.md
|
77 |
+
├─ weights/
|
78 |
+
│ ├─ shantianliang_proplus_e32.ckpt # ← 放你的 GPT 权重(改名后)
|
79 |
+
│ └─ shantianliang_proplus_e8_s192.pth # ← 放你的 SoVITS 权重(改名后)
|
80 |
+
└─ reference_audio/
|
81 |
+
└─ ref_shantianliang_1.wav # ← 放你的参考音频(改名后,已提供静音占位)
|
82 |
+
```
|
83 |
+
|
84 |
+
## 常见问题
|
85 |
+
|
86 |
+
- 如果报 **400** 且提示缺少参数,请检查 `/tts` 的必填字段:`text`、`text_lang`、`ref_audio_path`、`prompt_lang`。
|
87 |
+
- `ref_audio_path` 一定要是服务端的**本地路径**(例如 `/app/reference_audio/...`)。
|
88 |
+
- Hugging Face **Docker Space 监听端口**为 `7860`(本仓库已固定)。
|
astrbot_plugin_config_example.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"base_url": "https://<your-space>.hf.space",
|
3 |
+
"gpt_weights_path": "/app/pretrained_models/shantianliang/shantianliang_proplus_e32.ckpt",
|
4 |
+
"sovits_weights_path": "/app/pretrained_models/shantianliang/shantianliang_proplus_e8_s192.pth"
|
5 |
+
}
|
download_support_models.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import snapshot_download
|
2 |
+
import os
|
3 |
+
|
4 |
+
target = "pretrained_models"
|
5 |
+
os.makedirs(target, exist_ok=True)
|
6 |
+
|
7 |
+
# Download speech encoders and Chinese frontends (kept small; add more as needed)
|
8 |
+
try:
|
9 |
+
snapshot_download(
|
10 |
+
repo_id="lj1995/GPT-SoVITS",
|
11 |
+
repo_type="model",
|
12 |
+
local_dir=target,
|
13 |
+
allow_patterns=["sv/*", "chinese*"],
|
14 |
+
)
|
15 |
+
print("Support models downloaded to ./pretrained_models")
|
16 |
+
except Exception as e:
|
17 |
+
print("Skipping support model download:", e)
|
reference_audio/ref_shantianliang_1.wav
ADDED
Binary file (48 kB). View file
|
|
weights/README_PLACEHOLDER.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Put your finetuned model files here and rename them as follows:
|
2 |
+
|
3 |
+
- GPT (.ckpt) -> shantianliang_proplus_e32.ckpt
|
4 |
+
- SoVITS (.pth)-> shantianliang_proplus_e8_s192.pth
|
5 |
+
|
6 |
+
These files will be copied into the Docker image at:
|
7 |
+
/app/pretrained_models/shantianliang/
|