Spaces:
Build error
Build error
added some descriptions
Browse files
README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.15.0
|
@@ -10,4 +10,7 @@ pinned: false
|
|
10 |
python_version: 3.7
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: SOVITS | Overwatch 2
|
3 |
+
emoji: 🗣️
|
4 |
+
colorFrom: orange
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.15.0
|
|
|
10 |
python_version: 3.7
|
11 |
---
|
12 |
|
13 |
+
# SOVITS OW2 - Voice Conversion Model
|
14 |
+
|
15 |
+
This is a [SOVITS model](https://github.com/Francis-Komizu/Sovits) trained on every Overwatch 2 hero up to Kiriko (exception Bastion, please forgive me). The model was trained for 195000 iterations.
|
16 |
+
It's not too great to be honest, unlike Soft-VC it doesn't appear to adjust the voice pitch to the target speaker. I added a pitch shift option, but it's pretty slow and doesn't really improve things most of the time, use at your own risk.
|
app.py
CHANGED
@@ -28,7 +28,7 @@ _ = net_g.eval()
|
|
28 |
_ = utils.load_checkpoint("logs/ow2/G_195000.pth", net_g, None)
|
29 |
|
30 |
|
31 |
-
def infer(audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_scale_w=0.8):
|
32 |
fname = audio
|
33 |
source, sr = torchaudio.load(fname)
|
34 |
|
@@ -53,14 +53,24 @@ def infer(audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_
|
|
53 |
demo = gradio.Interface(
|
54 |
fn=infer,
|
55 |
inputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
gradio.Audio(label="Input Audio", type="filepath"),
|
57 |
gradio.Dropdown(label="Target Voice", choices=["Ana", "Ashe", "Baptiste", "Brigitte", "Cassidy", "Doomfist", "D.Va", "Echo", "Genji", "Hanzo", "Junker Queen", "Junkrat", "Kiriko", "Lúcio", "Mei", "Mercy", "Moira", "Orisa", "Pharah", "Reaper", "Reinhardt", "Roadhog", "Sigma", "Sojourn", "Soldier_ 76", "Sombra", "Symmetra", "Torbjörn", "Tracer", "Widowmaker", "Winston", "Zarya", "Zenyatta"], type="index", value="Ana"),
|
58 |
-
gradio.Slider(label="Pitch Shift Input (+12 = up one octave)", minimum=-12.0, maximum=12.0, value=0, step=1),
|
59 |
-
gradio.Slider(label="Length Factor", minimum=0.1, maximum=2.0, value=1.0),
|
60 |
gradio.Slider(label="Noise Scale (higher = more expressive and erratic)", minimum=0.0, maximum=2.0, value=.667),
|
61 |
gradio.Slider(label="Noise Scale W (higher = more variation in cadence)", minimum=0.0, maximum=2.0, value=.8)
|
62 |
],
|
63 |
outputs=[gradio.Audio(label="Audio as Target Voice")],
|
64 |
)
|
65 |
#demo.launch(share=True)
|
66 |
-
demo.launch(server_name="0.0.0.0")
|
|
|
28 |
_ = utils.load_checkpoint("logs/ow2/G_195000.pth", net_g, None)
|
29 |
|
30 |
|
31 |
+
def infer(md, audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_scale_w=0.8):
|
32 |
fname = audio
|
33 |
source, sr = torchaudio.load(fname)
|
34 |
|
|
|
53 |
demo = gradio.Interface(
|
54 |
fn=infer,
|
55 |
inputs=[
|
56 |
+
gradio.Markdown(
|
57 |
+
"""
|
58 |
+
# SOVITS | Overwatch 2
|
59 |
+
Upload any voice recording and turn it into a mangled approximation of any* Overwatch 2 Hero!
|
60 |
+
|
61 |
+
SOVITS doesn't really appear to adjust the pitch to the target speaker, so it helps to have your input voice at a similar pitch to the target voice.
|
62 |
+
I added a pitch shift option to preprocess the input voice, but it's slow and sometimes outright broken, use at your own risk.
|
63 |
+
|
64 |
+
( * up to Kiriko and without Bastion. Please forgive. )
|
65 |
+
"""),
|
66 |
gradio.Audio(label="Input Audio", type="filepath"),
|
67 |
gradio.Dropdown(label="Target Voice", choices=["Ana", "Ashe", "Baptiste", "Brigitte", "Cassidy", "Doomfist", "D.Va", "Echo", "Genji", "Hanzo", "Junker Queen", "Junkrat", "Kiriko", "Lúcio", "Mei", "Mercy", "Moira", "Orisa", "Pharah", "Reaper", "Reinhardt", "Roadhog", "Sigma", "Sojourn", "Soldier_ 76", "Sombra", "Symmetra", "Torbjörn", "Tracer", "Widowmaker", "Winston", "Zarya", "Zenyatta"], type="index", value="Ana"),
|
68 |
+
gradio.Slider(label="Pitch Shift Input (+12 = up one octave, ⚠️ broken AF ⚠️)", minimum=-12.0, maximum=12.0, value=0, step=1),
|
69 |
+
gradio.Slider(label="Length Factor (higher = slower speech)", minimum=0.1, maximum=2.0, value=1.0),
|
70 |
gradio.Slider(label="Noise Scale (higher = more expressive and erratic)", minimum=0.0, maximum=2.0, value=.667),
|
71 |
gradio.Slider(label="Noise Scale W (higher = more variation in cadence)", minimum=0.0, maximum=2.0, value=.8)
|
72 |
],
|
73 |
outputs=[gradio.Audio(label="Audio as Target Voice")],
|
74 |
)
|
75 |
#demo.launch(share=True)
|
76 |
+
demo.launch(server_name="0.0.0.0")
|