cjayic commited on
Commit
b131625
·
1 Parent(s): 6d4d33a

added some descriptions

Browse files
Files changed (2) hide show
  1. README.md +7 -4
  2. app.py +14 -4
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- title: Sovits Ow2
3
- emoji: 👀
4
- colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.15.0
@@ -10,4 +10,7 @@ pinned: false
10
  python_version: 3.7
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
  ---
2
+ title: SOVITS | Overwatch 2
3
+ emoji: 🗣️
4
+ colorFrom: orange
5
  colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.15.0
 
10
  python_version: 3.7
11
  ---
12
 
13
+ # SOVITS OW2 - Voice Conversion Model
14
+
15
+ This is a [SOVITS model](https://github.com/Francis-Komizu/Sovits) trained on every Overwatch 2 hero up to Kiriko (exception Bastion, please forgive me). The model was trained for 195000 iterations.
16
+ It's not too great to be honest, unlike Soft-VC it doesn't appear to adjust the voice pitch to the target speaker. I added a pitch shift option, but it's pretty slow and doesn't really improve things most of the time, use at your own risk.
app.py CHANGED
@@ -28,7 +28,7 @@ _ = net_g.eval()
28
  _ = utils.load_checkpoint("logs/ow2/G_195000.pth", net_g, None)
29
 
30
 
31
- def infer(audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_scale_w=0.8):
32
  fname = audio
33
  source, sr = torchaudio.load(fname)
34
 
@@ -53,14 +53,24 @@ def infer(audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_
53
  demo = gradio.Interface(
54
  fn=infer,
55
  inputs=[
 
 
 
 
 
 
 
 
 
 
56
  gradio.Audio(label="Input Audio", type="filepath"),
57
  gradio.Dropdown(label="Target Voice", choices=["Ana", "Ashe", "Baptiste", "Brigitte", "Cassidy", "Doomfist", "D.Va", "Echo", "Genji", "Hanzo", "Junker Queen", "Junkrat", "Kiriko", "Lúcio", "Mei", "Mercy", "Moira", "Orisa", "Pharah", "Reaper", "Reinhardt", "Roadhog", "Sigma", "Sojourn", "Soldier_ 76", "Sombra", "Symmetra", "Torbjörn", "Tracer", "Widowmaker", "Winston", "Zarya", "Zenyatta"], type="index", value="Ana"),
58
- gradio.Slider(label="Pitch Shift Input (+12 = up one octave)", minimum=-12.0, maximum=12.0, value=0, step=1),
59
- gradio.Slider(label="Length Factor", minimum=0.1, maximum=2.0, value=1.0),
60
  gradio.Slider(label="Noise Scale (higher = more expressive and erratic)", minimum=0.0, maximum=2.0, value=.667),
61
  gradio.Slider(label="Noise Scale W (higher = more variation in cadence)", minimum=0.0, maximum=2.0, value=.8)
62
  ],
63
  outputs=[gradio.Audio(label="Audio as Target Voice")],
64
  )
65
  #demo.launch(share=True)
66
- demo.launch(server_name="0.0.0.0")
 
28
  _ = utils.load_checkpoint("logs/ow2/G_195000.pth", net_g, None)
29
 
30
 
31
+ def infer(md, audio, speaker_id, pitch_shift, length_scale, noise_scale=.667, noise_scale_w=0.8):
32
  fname = audio
33
  source, sr = torchaudio.load(fname)
34
 
 
53
  demo = gradio.Interface(
54
  fn=infer,
55
  inputs=[
56
+ gradio.Markdown(
57
+ """
58
+ # SOVITS | Overwatch 2
59
+ Upload any voice recording and turn it into a mangled approximation of any* Overwatch 2 Hero!
60
+
61
+ SOVITS doesn't really appear to adjust the pitch to the target speaker, so it helps to have your input voice at a similar pitch to the target voice.
62
+ I added a pitch shift option to preprocess the input voice, but it's slow and sometimes outright broken, use at your own risk.
63
+
64
+ ( * up to Kiriko and without Bastion. Please forgive. )
65
+ """),
66
  gradio.Audio(label="Input Audio", type="filepath"),
67
  gradio.Dropdown(label="Target Voice", choices=["Ana", "Ashe", "Baptiste", "Brigitte", "Cassidy", "Doomfist", "D.Va", "Echo", "Genji", "Hanzo", "Junker Queen", "Junkrat", "Kiriko", "Lúcio", "Mei", "Mercy", "Moira", "Orisa", "Pharah", "Reaper", "Reinhardt", "Roadhog", "Sigma", "Sojourn", "Soldier_ 76", "Sombra", "Symmetra", "Torbjörn", "Tracer", "Widowmaker", "Winston", "Zarya", "Zenyatta"], type="index", value="Ana"),
68
+ gradio.Slider(label="Pitch Shift Input (+12 = up one octave, ⚠️ broken AF ⚠️)", minimum=-12.0, maximum=12.0, value=0, step=1),
69
+ gradio.Slider(label="Length Factor (higher = slower speech)", minimum=0.1, maximum=2.0, value=1.0),
70
  gradio.Slider(label="Noise Scale (higher = more expressive and erratic)", minimum=0.0, maximum=2.0, value=.667),
71
  gradio.Slider(label="Noise Scale W (higher = more variation in cadence)", minimum=0.0, maximum=2.0, value=.8)
72
  ],
73
  outputs=[gradio.Audio(label="Audio as Target Voice")],
74
  )
75
  #demo.launch(share=True)
76
+ demo.launch(server_name="0.0.0.0")