Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,7 +16,6 @@ model_files = {
|
|
16 |
"s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
|
17 |
}
|
18 |
|
19 |
-
# Download model files
|
20 |
for filename, url in model_files.items():
|
21 |
file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
|
22 |
if not os.path.exists(file_path):
|
@@ -25,27 +24,15 @@ for filename, url in model_files.items():
|
|
25 |
with open(file_path, 'wb') as f:
|
26 |
f.write(r.content)
|
27 |
|
28 |
-
# Initialize
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
# Initialize the model from the configuration
|
39 |
-
model = Xtts.init_from_config(config)
|
40 |
-
|
41 |
-
# Load the model checkpoint
|
42 |
-
model.load_checkpoint(config, checkpoint_dir=tts_dir, eval=True)
|
43 |
-
|
44 |
-
# Move the model to GPU (if available)
|
45 |
-
if torch.cuda.is_available():
|
46 |
-
model.cuda()
|
47 |
-
|
48 |
-
return model
|
49 |
|
50 |
# Translation class
|
51 |
class Translation:
|
@@ -53,7 +40,6 @@ class Translation:
|
|
53 |
self.video_path = video_path
|
54 |
self.original_language = original_language
|
55 |
self.target_language = target_language
|
56 |
-
self.model = initialize_xtts_model() # Initialize TTS model
|
57 |
|
58 |
def org_language_parameters(self, original_language):
|
59 |
language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
|
@@ -91,25 +77,24 @@ class Translation:
|
|
91 |
return translation
|
92 |
|
93 |
def generate_audio(self, translated_text):
|
94 |
-
#
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
outputs = self.model.synthesize(
|
101 |
translated_text,
|
102 |
config,
|
103 |
-
speaker_wav=
|
104 |
gpt_cond_len=3,
|
105 |
-
language=
|
106 |
)
|
|
|
|
|
|
|
|
|
107 |
|
108 |
-
|
109 |
-
with open(synthesized_audio_path, 'wb') as f:
|
110 |
-
f.write(outputs)
|
111 |
-
|
112 |
-
return synthesized_audio_path
|
113 |
|
114 |
def translate_video(self):
|
115 |
audio_path = self.extract_audio()
|
@@ -123,6 +108,7 @@ class Translation:
|
|
123 |
os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
|
124 |
return 'output_video.mp4'
|
125 |
|
|
|
126 |
# Gradio Interface
|
127 |
def app(video_path, original_language, target_language):
|
128 |
translator = Translation(video_path, original_language, target_language)
|
@@ -134,7 +120,7 @@ interface = gr.Interface(
|
|
134 |
inputs=[
|
135 |
gr.Video(label="Video Path"),
|
136 |
gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
|
137 |
-
gr.Dropdown(["English", "German", "Italian", "Spanish"], label="
|
138 |
],
|
139 |
outputs=gr.Video(label="Translated Video")
|
140 |
)
|
|
|
16 |
"s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
|
17 |
}
|
18 |
|
|
|
19 |
for filename, url in model_files.items():
|
20 |
file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
|
21 |
if not os.path.exists(file_path):
|
|
|
24 |
with open(file_path, 'wb') as f:
|
25 |
f.write(r.content)
|
26 |
|
27 |
+
# Initialize TTS model directly
|
28 |
+
config_path = "path/to/xtts/config.json" # Update with the correct path
|
29 |
+
checkpoint_dir = "path/to/xtts/" # Update with the correct path
|
30 |
+
|
31 |
+
config = XttsConfig()
|
32 |
+
config.load_json(config_path)
|
33 |
+
model = Xtts.init_from_config(config)
|
34 |
+
model.load_checkpoint(config, checkpoint_dir=checkpoint_dir, eval=True)
|
35 |
+
model.cuda()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
# Translation class
|
38 |
class Translation:
|
|
|
40 |
self.video_path = video_path
|
41 |
self.original_language = original_language
|
42 |
self.target_language = target_language
|
|
|
43 |
|
44 |
def org_language_parameters(self, original_language):
|
45 |
language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
|
|
|
77 |
return translation
|
78 |
|
79 |
def generate_audio(self, translated_text):
|
80 |
+
# Use TTS model directly to generate audio
|
81 |
+
audio_path = "output_synth.wav"
|
82 |
+
speaker_wav = 'output_audio.wav' # Assuming speaker wav file is available
|
83 |
+
language = self.tran_code
|
84 |
+
|
85 |
+
outputs = model.synthesize(
|
|
|
86 |
translated_text,
|
87 |
config,
|
88 |
+
speaker_wav=speaker_wav,
|
89 |
gpt_cond_len=3,
|
90 |
+
language=language,
|
91 |
)
|
92 |
+
|
93 |
+
# Save output to file
|
94 |
+
with open(audio_path, 'wb') as f:
|
95 |
+
f.write(outputs['audio'])
|
96 |
|
97 |
+
return audio_path
|
|
|
|
|
|
|
|
|
98 |
|
99 |
def translate_video(self):
|
100 |
audio_path = self.extract_audio()
|
|
|
108 |
os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
|
109 |
return 'output_video.mp4'
|
110 |
|
111 |
+
|
112 |
# Gradio Interface
|
113 |
def app(video_path, original_language, target_language):
|
114 |
translator = Translation(video_path, original_language, target_language)
|
|
|
120 |
inputs=[
|
121 |
gr.Video(label="Video Path"),
|
122 |
gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
|
123 |
+
gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Target Language"),
|
124 |
],
|
125 |
outputs=gr.Video(label="Translated Video")
|
126 |
)
|