Spaces:

Salman11223
/

Project

Sleeping

App Files Files Community

Salman11223 commited on Sep 16, 2024

Commit

430b249

verified ·

1 Parent(s): 45fe7e2

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -34

app.py CHANGED Viewed

@@ -2,11 +2,10 @@ import os
 import requests
 import gradio as gr
 import moviepy.editor as mp
-from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
 import torch
 import assemblyai as aai
 # Download necessary models if not already present
 model_files = {
     "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
@@ -16,6 +15,14 @@ model_files = {
     "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
 }
 for filename, url in model_files.items():
     file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
     if not os.path.exists(file_path):
@@ -24,19 +31,11 @@ for filename, url in model_files.items():
         with open(file_path, 'wb') as f:
             f.write(r.content)
-# Initialize TTS model directly
-config_path = "path/to/xtts/config.json"  # Update with the correct path
-checkpoint_dir = "path/to/xtts/"  # Update with the correct path
-config = XttsConfig()
-config.load_json(config_path)
-model = Xtts.init_from_config(config)
-model.load_checkpoint(config, checkpoint_dir=checkpoint_dir, eval=True)
-model.cuda()
 # Translation class
-class Translation:
-    def __init__(self, video_path, original_language, target_language):
         self.video_path = video_path
         self.original_language = original_language
         self.target_language = target_language
@@ -77,24 +76,8 @@ class Translation:
         return translation
     def generate_audio(self, translated_text):
-        # Use TTS model directly to generate audio
-        audio_path = "output_synth.wav"
-        speaker_wav = 'output_audio.wav'  # Assuming speaker wav file is available
-        language = self.tran_code
-        outputs = model.synthesize(
-            translated_text,
-            config,
-            speaker_wav=speaker_wav,
-            gpt_cond_len=3,
-            language=language,
-        )
-        # Save output to file
-        with open(audio_path, 'wb') as f:
-            f.write(outputs['audio'])
-        return audio_path
     def translate_video(self):
         audio_path = self.extract_audio()
@@ -111,7 +94,7 @@ class Translation:
 # Gradio Interface
 def app(video_path, original_language, target_language):
-    translator = Translation(video_path, original_language, target_language)
     video_file = translator.translate_video()
     return video_file
@@ -120,9 +103,9 @@ interface = gr.Interface(
     inputs=[
         gr.Video(label="Video Path"),
         gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
-        gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Target Language"),
     ],
     outputs=gr.Video(label="Translated Video")
 )
-interface.launch()

 import requests
 import gradio as gr
 import moviepy.editor as mp
+from TTS.api import TTS
 import torch
 import assemblyai as aai
+os.environ["COQUI_TOS_AGREED"] = "1"
 # Download necessary models if not already present
 model_files = {
     "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
     "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
 }
+device = "cuda"
+tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
 for filename, url in model_files.items():
     file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
     if not os.path.exists(file_path):
         with open(file_path, 'wb') as f:
             f.write(r.content)
 # Translation class
+class translation:
+    def _init_(self, video_path, original_language, target_language):
         self.video_path = video_path
         self.original_language = original_language
         self.target_language = target_language
         return translation
     def generate_audio(self, translated_text):
+        tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
+        return "output_synth.wav"
     def translate_video(self):
         audio_path = self.extract_audio()
 # Gradio Interface
 def app(video_path, original_language, target_language):
+    translator = translation(video_path, original_language, target_language)
     video_file = translator.translate_video()
     return video_file
     inputs=[
         gr.Video(label="Video Path"),
         gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
+        gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Targeted Language"),
     ],
     outputs=gr.Video(label="Translated Video")
 )
+interface.launch()