Salman11223 commited on
Commit
46446cd
·
verified ·
1 Parent(s): 5a70d69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -30
app.py CHANGED
@@ -2,7 +2,8 @@ import os
2
  import requests
3
  import gradio as gr
4
  import moviepy.editor as mp
5
- from TTS.api import TTS
 
6
  import torch
7
  import assemblyai as aai
8
 
@@ -15,27 +16,7 @@ model_files = {
15
  "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
16
  }
17
 
18
-
19
-
20
- # Get the path to the xtts_v2 folder
21
- tts = os.path.join(os.getcwd(), 'xtts_v2')
22
-
23
- # List all files in xtts_v2
24
- files = os.listdir(tts)
25
- print("Files in xtts_v2:", files)
26
-
27
- # Iterate through the files in xtts_v2
28
- for file_name in files:
29
- file_path = os.path.join(tts, file_name)
30
-
31
- # Check if it's a file or directory
32
- if os.path.isfile(file_path):
33
- print(f"{file_name} is a file.")
34
- elif os.path.isdir(file_path):
35
- print(f"{file_name} is a directory.")
36
-
37
-
38
-
39
  for filename, url in model_files.items():
40
  file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
41
  if not os.path.exists(file_path):
@@ -44,15 +25,35 @@ for filename, url in model_files.items():
44
  with open(file_path, 'wb') as f:
45
  f.write(r.content)
46
 
47
- # Initialize TTS model without prompts
48
- # tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True, progress_bar=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  # Translation class
51
- class translation:
52
  def __init__(self, video_path, original_language, target_language):
53
  self.video_path = video_path
54
  self.original_language = original_language
55
  self.target_language = target_language
 
56
 
57
  def org_language_parameters(self, original_language):
58
  language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
@@ -90,8 +91,25 @@ class translation:
90
  return translation
91
 
92
  def generate_audio(self, translated_text):
93
- tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
94
- return "output_synth.wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  def translate_video(self):
97
  audio_path = self.extract_audio()
@@ -105,10 +123,9 @@ class translation:
105
  os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
106
  return 'output_video.mp4'
107
 
108
-
109
  # Gradio Interface
110
  def app(video_path, original_language, target_language):
111
- translator = translation(video_path, original_language, target_language)
112
  video_file = translator.translate_video()
113
  return video_file
114
 
@@ -122,4 +139,4 @@ interface = gr.Interface(
122
  outputs=gr.Video(label="Translated Video")
123
  )
124
 
125
- interface.launch()
 
2
  import requests
3
  import gradio as gr
4
  import moviepy.editor as mp
5
+ from TTS.tts.configs.xtts_config import XttsConfig
6
+ from TTS.tts.models.xtts import Xtts
7
  import torch
8
  import assemblyai as aai
9
 
 
16
  "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
17
  }
18
 
19
+ # Download model files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  for filename, url in model_files.items():
21
  file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
22
  if not os.path.exists(file_path):
 
25
  with open(file_path, 'wb') as f:
26
  f.write(r.content)
27
 
28
+ # Initialize xtts model
29
+ def initialize_xtts_model():
30
+ # Get the path to the xtts_v2 folder
31
+ tts_dir = os.path.join(os.getcwd(), 'xtts_v2')
32
+
33
+ # Load the configuration
34
+ config_path = os.path.join(tts_dir, 'config.json')
35
+ config = XttsConfig()
36
+ config.load_json(config_path)
37
+
38
+ # Initialize the model from the configuration
39
+ model = Xtts.init_from_config(config)
40
+
41
+ # Load the model checkpoint
42
+ model.load_checkpoint(config, checkpoint_dir=tts_dir, eval=True)
43
+
44
+ # Move the model to GPU (if available)
45
+ if torch.cuda.is_available():
46
+ model.cuda()
47
+
48
+ return model
49
 
50
  # Translation class
51
+ class Translation:
52
  def __init__(self, video_path, original_language, target_language):
53
  self.video_path = video_path
54
  self.original_language = original_language
55
  self.target_language = target_language
56
+ self.model = initialize_xtts_model() # Initialize TTS model
57
 
58
  def org_language_parameters(self, original_language):
59
  language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
 
91
  return translation
92
 
93
  def generate_audio(self, translated_text):
94
+ # Generate audio using the xtts model
95
+ config = XttsConfig()
96
+ config.load_json(os.path.join(os.getcwd(), 'xtts_v2', 'config.json'))
97
+
98
+ # Generate audio
99
+ synthesized_audio_path = "output_synth.wav"
100
+ outputs = self.model.synthesize(
101
+ translated_text,
102
+ config,
103
+ speaker_wav='output_audio.wav',
104
+ gpt_cond_len=3,
105
+ language=self.tran_code,
106
+ )
107
+
108
+ # Save the output to file
109
+ with open(synthesized_audio_path, 'wb') as f:
110
+ f.write(outputs)
111
+
112
+ return synthesized_audio_path
113
 
114
  def translate_video(self):
115
  audio_path = self.extract_audio()
 
123
  os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
124
  return 'output_video.mp4'
125
 
 
126
  # Gradio Interface
127
  def app(video_path, original_language, target_language):
128
+ translator = Translation(video_path, original_language, target_language)
129
  video_file = translator.translate_video()
130
  return video_file
131
 
 
139
  outputs=gr.Video(label="Translated Video")
140
  )
141
 
142
+ interface.launch()