Spaces:

Salman11223
/

Project

Sleeping

App Files Files Community

Salman11223 commited on Sep 15, 2024

Commit

46446cd

verified ·

1 Parent(s): 5a70d69

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -30

app.py CHANGED Viewed

@@ -2,7 +2,8 @@ import os
 import requests
 import gradio as gr
 import moviepy.editor as mp
-from TTS.api import TTS
 import torch
 import assemblyai as aai
@@ -15,27 +16,7 @@ model_files = {
     "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
 }
-# Get the path to the xtts_v2 folder
-tts = os.path.join(os.getcwd(), 'xtts_v2')
-# List all files in xtts_v2
-files = os.listdir(tts)
-print("Files in xtts_v2:", files)
-# Iterate through the files in xtts_v2
-for file_name in files:
-    file_path = os.path.join(tts, file_name)
-    # Check if it's a file or directory
-    if os.path.isfile(file_path):
-        print(f"{file_name} is a file.")
-    elif os.path.isdir(file_path):
-        print(f"{file_name} is a directory.")
 for filename, url in model_files.items():
     file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
     if not os.path.exists(file_path):
@@ -44,15 +25,35 @@ for filename, url in model_files.items():
         with open(file_path, 'wb') as f:
             f.write(r.content)
-# Initialize TTS model without prompts
-# tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True, progress_bar=False)
 # Translation class
-class translation:
     def __init__(self, video_path, original_language, target_language):
         self.video_path = video_path
         self.original_language = original_language
         self.target_language = target_language
     def org_language_parameters(self, original_language):
         language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
@@ -90,8 +91,25 @@ class translation:
         return translation
     def generate_audio(self, translated_text):
-        tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
-        return "output_synth.wav"
     def translate_video(self):
         audio_path = self.extract_audio()
@@ -105,10 +123,9 @@ class translation:
         os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
         return 'output_video.mp4'
 # Gradio Interface
 def app(video_path, original_language, target_language):
-    translator = translation(video_path, original_language, target_language)
     video_file = translator.translate_video()
     return video_file
@@ -122,4 +139,4 @@ interface = gr.Interface(
     outputs=gr.Video(label="Translated Video")
 )
-interface.launch()

 import requests
 import gradio as gr
 import moviepy.editor as mp
+from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts
 import torch
 import assemblyai as aai
     "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
 }
+# Download model files
 for filename, url in model_files.items():
     file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
     if not os.path.exists(file_path):
         with open(file_path, 'wb') as f:
             f.write(r.content)
+# Initialize xtts model
+def initialize_xtts_model():
+    # Get the path to the xtts_v2 folder
+    tts_dir = os.path.join(os.getcwd(), 'xtts_v2')
+    # Load the configuration
+    config_path = os.path.join(tts_dir, 'config.json')
+    config = XttsConfig()
+    config.load_json(config_path)
+    # Initialize the model from the configuration
+    model = Xtts.init_from_config(config)
+    # Load the model checkpoint
+    model.load_checkpoint(config, checkpoint_dir=tts_dir, eval=True)
+    # Move the model to GPU (if available)
+    if torch.cuda.is_available():
+        model.cuda()
+    return model
 # Translation class
+class Translation:
     def __init__(self, video_path, original_language, target_language):
         self.video_path = video_path
         self.original_language = original_language
         self.target_language = target_language
+        self.model = initialize_xtts_model()  # Initialize TTS model
     def org_language_parameters(self, original_language):
         language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
         return translation
     def generate_audio(self, translated_text):
+        # Generate audio using the xtts model
+        config = XttsConfig()
+        config.load_json(os.path.join(os.getcwd(), 'xtts_v2', 'config.json'))
+        # Generate audio
+        synthesized_audio_path = "output_synth.wav"
+        outputs = self.model.synthesize(
+            translated_text,
+            config,
+            speaker_wav='output_audio.wav',
+            gpt_cond_len=3,
+            language=self.tran_code,
+        )
+        # Save the output to file
+        with open(synthesized_audio_path, 'wb') as f:
+            f.write(outputs)
+        return synthesized_audio_path
     def translate_video(self):
         audio_path = self.extract_audio()
         os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
         return 'output_video.mp4'
 # Gradio Interface
 def app(video_path, original_language, target_language):
+    translator = Translation(video_path, original_language, target_language)
     video_file = translator.translate_video()
     return video_file
     outputs=gr.Video(label="Translated Video")
 )
+interface.launch()