adowu commited on
Commit
b416379
·
verified ·
1 Parent(s): 006f2a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -16,25 +16,28 @@ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
16
  @spaces.GPU(enable_queue=True)
17
  def clone(text, audio):
18
  # Generowanie mowy
19
- wav, alignment, text_info, _ = tts.tts(text=text, speaker_wav=audio, language="pl", return_dict=True)
20
 
21
  # Konwersja do numpy array i zapisanie jako plik WAV
22
- wav_np = np.array(wav)
23
  wavfile.write("./output.wav", 24000, (wav_np * 32767).astype(np.int16))
24
 
25
  # Przygotowanie informacji o fonemach
26
  phonemes_data = []
27
  cumulative_duration = 0
28
- for phoneme, duration in zip(text_info['phonemes'], alignment['durations']):
29
- start_time = cumulative_duration
30
- end_time = start_time + duration
31
- phonemes_data.append({
32
- "phoneme": phoneme,
33
- "start": float(start_time),
34
- "end": float(end_time),
35
- "duration": float(duration)
36
- })
37
- cumulative_duration = end_time
 
 
 
38
 
39
  # Zapisywanie informacji o fonemach do pliku JSON
40
  with open("./phonemes_info.json", "w", encoding="utf-8") as f:
 
16
  @spaces.GPU(enable_queue=True)
17
  def clone(text, audio):
18
  # Generowanie mowy
19
+ result = tts.tts(text=text, speaker_wav=audio, language="pl", return_dict=True)
20
 
21
  # Konwersja do numpy array i zapisanie jako plik WAV
22
+ wav_np = np.array(result['wav'])
23
  wavfile.write("./output.wav", 24000, (wav_np * 32767).astype(np.int16))
24
 
25
  # Przygotowanie informacji o fonemach
26
  phonemes_data = []
27
  cumulative_duration = 0
28
+ if 'phonemes' in result and 'durations' in result:
29
+ for phoneme, duration in zip(result['phonemes'], result['durations']):
30
+ start_time = cumulative_duration
31
+ end_time = start_time + duration
32
+ phonemes_data.append({
33
+ "phoneme": phoneme,
34
+ "start": float(start_time),
35
+ "end": float(end_time),
36
+ "duration": float(duration)
37
+ })
38
+ cumulative_duration = end_time
39
+ else:
40
+ phonemes_data.append({"error": "Brak informacji o fonemach"})
41
 
42
  # Zapisywanie informacji o fonemach do pliku JSON
43
  with open("./phonemes_info.json", "w", encoding="utf-8") as f: