Salman11223 commited on
Commit
430b249
·
verified ·
1 Parent(s): 45fe7e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -34
app.py CHANGED
@@ -2,11 +2,10 @@ import os
2
  import requests
3
  import gradio as gr
4
  import moviepy.editor as mp
5
- from TTS.tts.configs.xtts_config import XttsConfig
6
- from TTS.tts.models.xtts import Xtts
7
  import torch
8
  import assemblyai as aai
9
-
10
  # Download necessary models if not already present
11
  model_files = {
12
  "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
@@ -16,6 +15,14 @@ model_files = {
16
  "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
17
  }
18
 
 
 
 
 
 
 
 
 
19
  for filename, url in model_files.items():
20
  file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
21
  if not os.path.exists(file_path):
@@ -24,19 +31,11 @@ for filename, url in model_files.items():
24
  with open(file_path, 'wb') as f:
25
  f.write(r.content)
26
 
27
- # Initialize TTS model directly
28
- config_path = "path/to/xtts/config.json" # Update with the correct path
29
- checkpoint_dir = "path/to/xtts/" # Update with the correct path
30
 
31
- config = XttsConfig()
32
- config.load_json(config_path)
33
- model = Xtts.init_from_config(config)
34
- model.load_checkpoint(config, checkpoint_dir=checkpoint_dir, eval=True)
35
- model.cuda()
36
 
37
  # Translation class
38
- class Translation:
39
- def __init__(self, video_path, original_language, target_language):
40
  self.video_path = video_path
41
  self.original_language = original_language
42
  self.target_language = target_language
@@ -77,24 +76,8 @@ class Translation:
77
  return translation
78
 
79
  def generate_audio(self, translated_text):
80
- # Use TTS model directly to generate audio
81
- audio_path = "output_synth.wav"
82
- speaker_wav = 'output_audio.wav' # Assuming speaker wav file is available
83
- language = self.tran_code
84
-
85
- outputs = model.synthesize(
86
- translated_text,
87
- config,
88
- speaker_wav=speaker_wav,
89
- gpt_cond_len=3,
90
- language=language,
91
- )
92
-
93
- # Save output to file
94
- with open(audio_path, 'wb') as f:
95
- f.write(outputs['audio'])
96
-
97
- return audio_path
98
 
99
  def translate_video(self):
100
  audio_path = self.extract_audio()
@@ -111,7 +94,7 @@ class Translation:
111
 
112
  # Gradio Interface
113
  def app(video_path, original_language, target_language):
114
- translator = Translation(video_path, original_language, target_language)
115
  video_file = translator.translate_video()
116
  return video_file
117
 
@@ -120,9 +103,9 @@ interface = gr.Interface(
120
  inputs=[
121
  gr.Video(label="Video Path"),
122
  gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
123
- gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Target Language"),
124
  ],
125
  outputs=gr.Video(label="Translated Video")
126
  )
127
 
128
- interface.launch()
 
2
  import requests
3
  import gradio as gr
4
  import moviepy.editor as mp
5
+ from TTS.api import TTS
 
6
  import torch
7
  import assemblyai as aai
8
+ os.environ["COQUI_TOS_AGREED"] = "1"
9
  # Download necessary models if not already present
10
  model_files = {
11
  "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
 
15
  "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
16
  }
17
 
18
+
19
+
20
+ device = "cuda"
21
+
22
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
23
+
24
+
25
+
26
  for filename, url in model_files.items():
27
  file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
28
  if not os.path.exists(file_path):
 
31
  with open(file_path, 'wb') as f:
32
  f.write(r.content)
33
 
 
 
 
34
 
 
 
 
 
 
35
 
36
  # Translation class
37
+ class translation:
38
+ def _init_(self, video_path, original_language, target_language):
39
  self.video_path = video_path
40
  self.original_language = original_language
41
  self.target_language = target_language
 
76
  return translation
77
 
78
  def generate_audio(self, translated_text):
79
+ tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
80
+ return "output_synth.wav"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  def translate_video(self):
83
  audio_path = self.extract_audio()
 
94
 
95
  # Gradio Interface
96
  def app(video_path, original_language, target_language):
97
+ translator = translation(video_path, original_language, target_language)
98
  video_file = translator.translate_video()
99
  return video_file
100
 
 
103
  inputs=[
104
  gr.Video(label="Video Path"),
105
  gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
106
+ gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Targeted Language"),
107
  ],
108
  outputs=gr.Video(label="Translated Video")
109
  )
110
 
111
+ interface.launch()