Ionut-Bostan commited on
Commit
6a6dfa7
·
1 Parent(s): be197a9

moddified app.py and changed path for text classifier

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. synthesize.py +11 -17
app.py CHANGED
@@ -20,7 +20,7 @@ def synthesize_speech(input_type, text, own_text, speaker_id, embed_type, emotio
20
  if embed_type == "bert_embed":
21
  command = f"python3 synthesize.py --text '{selected_text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
22
  else:
23
- command = f"python3 synthesize.py --text '{selected_text}' --emotion_id {emotion_id} --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
24
 
25
  output = subprocess.check_output(command, shell=True)
26
  audio_file = f'output/result/EmoV_DB/{selected_text}.wav'
 
20
  if embed_type == "bert_embed":
21
  command = f"python3 synthesize.py --text '{selected_text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
22
  else:
23
+ command = f"python3 synthesize.py --text '{selected_text}' --emotion_id {emotion_mapping[emotion_id]} --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
24
 
25
  output = subprocess.check_output(command, shell=True)
26
  audio_file = f'output/result/EmoV_DB/{selected_text}.wav'
synthesize.py CHANGED
@@ -5,9 +5,6 @@ from string import punctuation
5
  import torch
6
 
7
 
8
-
9
-
10
-
11
  import yaml
12
  import numpy as np
13
  from torch.utils.data import DataLoader
@@ -21,7 +18,7 @@ from text import text_to_sequence
21
 
22
  from transformers import RobertaTokenizerFast, AutoModel, AutoModelForSequenceClassification
23
 
24
- ro_model = "/content/FastSpeech2_Text_Aware_Emotion_TTS/roberta_pretrained"
25
  roberta_tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')
26
  roberta_model = AutoModelForSequenceClassification.from_pretrained(ro_model)
27
 
@@ -29,9 +26,6 @@ roberta_model = AutoModelForSequenceClassification.from_pretrained(ro_model)
29
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
 
31
 
32
-
33
-
34
-
35
  def read_lexicon(lex_path):
36
  lexicon = {}
37
  with open(lex_path) as f:
@@ -216,32 +210,32 @@ if __name__ == "__main__":
216
 
217
  # Get model
218
  model = get_model(args, configs, device, train=False)
219
-
220
  # Load vocoder
221
  vocoder = get_vocoder(model_config, device)
222
-
223
  # Preprocess texts
224
  if args.mode == "batch":
225
  # Get dataset
226
  dataset = TextDataset(args.source, preprocess_config)
227
  batchs = DataLoader(
228
  dataset,
229
- batch_size=8,
230
  collate_fn=dataset.collate_fn,
231
  )
232
  if args.mode == "single":
233
 
234
  if np.array([args.bert_embed]) == 0:
235
- emotions = np.array([args.emotion_id])
236
- # print(f'FS2 emotions: {emotions}')
237
  else:
238
- emotions = get_roberta_emotion_embeddings(roberta_tokenizer, roberta_model, args.text)
239
- emotions = torch.argmax(emotions, dim=1).cpu().numpy()
240
- # print(f'RoBERTa emotions {emotions}')
 
241
  ids = raw_texts = [args.text[:100]]
242
  speakers = np.array([args.speaker_id])
243
-
244
-
245
  if preprocess_config["preprocessing"]["text"]["language"] == "en":
246
  texts = np.array(
247
  [preprocess_english(args.text, preprocess_config)])
 
5
  import torch
6
 
7
 
 
 
 
8
  import yaml
9
  import numpy as np
10
  from torch.utils.data import DataLoader
 
18
 
19
  from transformers import RobertaTokenizerFast, AutoModel, AutoModelForSequenceClassification
20
 
21
+ ro_model = "/roberta_pretrained"
22
  roberta_tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base')
23
  roberta_model = AutoModelForSequenceClassification.from_pretrained(ro_model)
24
 
 
26
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
 
28
 
 
 
 
29
  def read_lexicon(lex_path):
30
  lexicon = {}
31
  with open(lex_path) as f:
 
210
 
211
  # Get model
212
  model = get_model(args, configs, device, train=False)
213
+
214
  # Load vocoder
215
  vocoder = get_vocoder(model_config, device)
216
+
217
  # Preprocess texts
218
  if args.mode == "batch":
219
  # Get dataset
220
  dataset = TextDataset(args.source, preprocess_config)
221
  batchs = DataLoader(
222
  dataset,
223
+ batch_size=8,
224
  collate_fn=dataset.collate_fn,
225
  )
226
  if args.mode == "single":
227
 
228
  if np.array([args.bert_embed]) == 0:
229
+ emotions = np.array([args.emotion_id])
230
+ # print(f'FS2 emotions: {emotions}')
231
  else:
232
+ emotions = get_roberta_emotion_embeddings(
233
+ roberta_tokenizer, roberta_model, args.text)
234
+ emotions = torch.argmax(emotions, dim=1).cpu().numpy()
235
+ # print(f'RoBERTa emotions {emotions}')
236
  ids = raw_texts = [args.text[:100]]
237
  speakers = np.array([args.speaker_id])
238
+
 
239
  if preprocess_config["preprocessing"]["text"]["language"] == "en":
240
  texts = np.array(
241
  [preprocess_english(args.text, preprocess_config)])