Salman11223 commited on
Commit
45fe7e2
·
verified ·
1 Parent(s): 5b012e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -38
app.py CHANGED
@@ -16,7 +16,6 @@ model_files = {
16
  "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
17
  }
18
 
19
- # Download model files
20
  for filename, url in model_files.items():
21
  file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
22
  if not os.path.exists(file_path):
@@ -25,27 +24,15 @@ for filename, url in model_files.items():
25
  with open(file_path, 'wb') as f:
26
  f.write(r.content)
27
 
28
- # Initialize xtts model
29
- def initialize_xtts_model():
30
- # Get the path to the xtts_v2 folder
31
- tts_dir = os.path.join(os.getcwd(), 'xtts_v2')
32
-
33
- # Load the configuration
34
- config_path = os.path.join(tts_dir, 'config.json')
35
- config = XttsConfig()
36
- config.load_json(config_path)
37
-
38
- # Initialize the model from the configuration
39
- model = Xtts.init_from_config(config)
40
-
41
- # Load the model checkpoint
42
- model.load_checkpoint(config, checkpoint_dir=tts_dir, eval=True)
43
-
44
- # Move the model to GPU (if available)
45
- if torch.cuda.is_available():
46
- model.cuda()
47
-
48
- return model
49
 
50
  # Translation class
51
  class Translation:
@@ -53,7 +40,6 @@ class Translation:
53
  self.video_path = video_path
54
  self.original_language = original_language
55
  self.target_language = target_language
56
- self.model = initialize_xtts_model() # Initialize TTS model
57
 
58
  def org_language_parameters(self, original_language):
59
  language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
@@ -91,25 +77,24 @@ class Translation:
91
  return translation
92
 
93
  def generate_audio(self, translated_text):
94
- # Generate audio using the xtts model
95
- config = XttsConfig()
96
- config.load_json(os.path.join(os.getcwd(), 'xtts_v2', 'config.json'))
97
-
98
- # Generate audio
99
- synthesized_audio_path = "output_synth.wav"
100
- outputs = self.model.synthesize(
101
  translated_text,
102
  config,
103
- speaker_wav='output_audio.wav',
104
  gpt_cond_len=3,
105
- language=self.tran_code,
106
  )
 
 
 
 
107
 
108
- # Save the output to file
109
- with open(synthesized_audio_path, 'wb') as f:
110
- f.write(outputs)
111
-
112
- return synthesized_audio_path
113
 
114
  def translate_video(self):
115
  audio_path = self.extract_audio()
@@ -123,6 +108,7 @@ class Translation:
123
  os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
124
  return 'output_video.mp4'
125
 
 
126
  # Gradio Interface
127
  def app(video_path, original_language, target_language):
128
  translator = Translation(video_path, original_language, target_language)
@@ -134,7 +120,7 @@ interface = gr.Interface(
134
  inputs=[
135
  gr.Video(label="Video Path"),
136
  gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
137
- gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Targeted Language"),
138
  ],
139
  outputs=gr.Video(label="Translated Video")
140
  )
 
16
  "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
17
  }
18
 
 
19
  for filename, url in model_files.items():
20
  file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
21
  if not os.path.exists(file_path):
 
24
  with open(file_path, 'wb') as f:
25
  f.write(r.content)
26
 
27
+ # Initialize TTS model directly
28
+ config_path = "path/to/xtts/config.json" # Update with the correct path
29
+ checkpoint_dir = "path/to/xtts/" # Update with the correct path
30
+
31
+ config = XttsConfig()
32
+ config.load_json(config_path)
33
+ model = Xtts.init_from_config(config)
34
+ model.load_checkpoint(config, checkpoint_dir=checkpoint_dir, eval=True)
35
+ model.cuda()
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Translation class
38
  class Translation:
 
40
  self.video_path = video_path
41
  self.original_language = original_language
42
  self.target_language = target_language
 
43
 
44
  def org_language_parameters(self, original_language):
45
  language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
 
77
  return translation
78
 
79
  def generate_audio(self, translated_text):
80
+ # Use TTS model directly to generate audio
81
+ audio_path = "output_synth.wav"
82
+ speaker_wav = 'output_audio.wav' # Assuming speaker wav file is available
83
+ language = self.tran_code
84
+
85
+ outputs = model.synthesize(
 
86
  translated_text,
87
  config,
88
+ speaker_wav=speaker_wav,
89
  gpt_cond_len=3,
90
+ language=language,
91
  )
92
+
93
+ # Save output to file
94
+ with open(audio_path, 'wb') as f:
95
+ f.write(outputs['audio'])
96
 
97
+ return audio_path
 
 
 
 
98
 
99
  def translate_video(self):
100
  audio_path = self.extract_audio()
 
108
  os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
109
  return 'output_video.mp4'
110
 
111
+
112
  # Gradio Interface
113
  def app(video_path, original_language, target_language):
114
  translator = Translation(video_path, original_language, target_language)
 
120
  inputs=[
121
  gr.Video(label="Video Path"),
122
  gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
123
+ gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Target Language"),
124
  ],
125
  outputs=gr.Video(label="Translated Video")
126
  )