Spaces:

Salman11223
/

Project

Sleeping

App Files Files Community

Salman11223 commited on Sep 15, 2024

Commit

45fe7e2

verified ·

1 Parent(s): 5b012e8

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -38

app.py CHANGED Viewed

@@ -16,7 +16,6 @@ model_files = {
     "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
 }
-# Download model files
 for filename, url in model_files.items():
     file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
     if not os.path.exists(file_path):
@@ -25,27 +24,15 @@ for filename, url in model_files.items():
         with open(file_path, 'wb') as f:
             f.write(r.content)
-# Initialize xtts model
-def initialize_xtts_model():
-    # Get the path to the xtts_v2 folder
-    tts_dir = os.path.join(os.getcwd(), 'xtts_v2')
-    # Load the configuration
-    config_path = os.path.join(tts_dir, 'config.json')
-    config = XttsConfig()
-    config.load_json(config_path)
-    # Initialize the model from the configuration
-    model = Xtts.init_from_config(config)
-    # Load the model checkpoint
-    model.load_checkpoint(config, checkpoint_dir=tts_dir, eval=True)
-    # Move the model to GPU (if available)
-    if torch.cuda.is_available():
-        model.cuda()
-    return model
 # Translation class
 class Translation:
@@ -53,7 +40,6 @@ class Translation:
         self.video_path = video_path
         self.original_language = original_language
         self.target_language = target_language
-        self.model = initialize_xtts_model()  # Initialize TTS model
     def org_language_parameters(self, original_language):
         language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
@@ -91,25 +77,24 @@ class Translation:
         return translation
     def generate_audio(self, translated_text):
-        # Generate audio using the xtts model
-        config = XttsConfig()
-        config.load_json(os.path.join(os.getcwd(), 'xtts_v2', 'config.json'))
-        # Generate audio
-        synthesized_audio_path = "output_synth.wav"
-        outputs = self.model.synthesize(
             translated_text,
             config,
-            speaker_wav='output_audio.wav',
             gpt_cond_len=3,
-            language=self.tran_code,
         )
-        # Save the output to file
-        with open(synthesized_audio_path, 'wb') as f:
-            f.write(outputs)
-        return synthesized_audio_path
     def translate_video(self):
         audio_path = self.extract_audio()
@@ -123,6 +108,7 @@ class Translation:
         os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
         return 'output_video.mp4'
 # Gradio Interface
 def app(video_path, original_language, target_language):
     translator = Translation(video_path, original_language, target_language)
@@ -134,7 +120,7 @@ interface = gr.Interface(
     inputs=[
         gr.Video(label="Video Path"),
         gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
-        gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Targeted Language"),
     ],
     outputs=gr.Video(label="Translated Video")
 )

     "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
 }
 for filename, url in model_files.items():
     file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
     if not os.path.exists(file_path):
         with open(file_path, 'wb') as f:
             f.write(r.content)
+# Initialize TTS model directly
+config_path = "path/to/xtts/config.json"  # Update with the correct path
+checkpoint_dir = "path/to/xtts/"  # Update with the correct path
+config = XttsConfig()
+config.load_json(config_path)
+model = Xtts.init_from_config(config)
+model.load_checkpoint(config, checkpoint_dir=checkpoint_dir, eval=True)
+model.cuda()
 # Translation class
 class Translation:
         self.video_path = video_path
         self.original_language = original_language
         self.target_language = target_language
     def org_language_parameters(self, original_language):
         language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
         return translation
     def generate_audio(self, translated_text):
+        # Use TTS model directly to generate audio
+        audio_path = "output_synth.wav"
+        speaker_wav = 'output_audio.wav'  # Assuming speaker wav file is available
+        language = self.tran_code
+        outputs = model.synthesize(
             translated_text,
             config,
+            speaker_wav=speaker_wav,
             gpt_cond_len=3,
+            language=language,
         )
+        # Save output to file
+        with open(audio_path, 'wb') as f:
+            f.write(outputs['audio'])
+        return audio_path
     def translate_video(self):
         audio_path = self.extract_audio()
         os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
         return 'output_video.mp4'
 # Gradio Interface
 def app(video_path, original_language, target_language):
     translator = Translation(video_path, original_language, target_language)
     inputs=[
         gr.Video(label="Video Path"),
         gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
+        gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Target Language"),
     ],
     outputs=gr.Video(label="Translated Video")
 )