Spaces:

Salman11223
/

Project

Sleeping

App Files Files Community

Salman11223 commited on Sep 15, 2024

Commit

54daa0d

verified ·

1 Parent(s): 65495bc

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -44

app.py CHANGED Viewed

@@ -2,48 +2,42 @@ import os
 import requests
 import gradio as gr
 import moviepy.editor as mp
-from TTS.api import TTS
 import torch
 import assemblyai as aai
-# Download necessary models if not already present
-model_files = {
-    "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
-    "wav2lip_gan.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth",
-    "resnet50.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth",
-    "mobilenet.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth",
-    "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
-}
-tts_model_path = "./xtts_v2"
-# Initialize TTS model
-# tts = TTS(tts_model_path, gpu=True)
-# Initialize TTS model with verbose output
-try:
-    print(f"Initializing TTS model from path: {tts_model_path}")
-    tts = TTS(tts_model_path, gpu=True)
-    print("TTS model initialized successfully.")
-except Exception as e:
-    print(f"Error initializing TTS model: {e}")
-for filename, url in model_files.items():
-    file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
-    if not os.path.exists(file_path):
-        print(f"Downloading {filename}...")
-        r = requests.get(url)
-        with open(file_path, 'wb') as f:
-            f.write(r.content)
 # Translation class
-class translation:
-    def __init__(self, video_path, original_language, target_language):
         self.video_path = video_path
         self.original_language = original_language
         self.target_language = target_language
@@ -83,13 +77,15 @@ class translation:
         translation = response.json()[0]["translations"][0]["text"]
         return translation
-    # def generate_audio(self, translated_text):
-    #     tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
-    #     return "output_synth.wav"
     def generate_audio(self, translated_text):
         try:
-            tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
             return "output_synth.wav"
         except Exception as e:
             print(f"Error generating audio: {e}")
@@ -110,7 +106,7 @@ class translation:
 # Gradio Interface
 def app(video_path, original_language, target_language):
-    translator = translation(video_path, original_language, target_language)
     video_file = translator.translate_video()
     return video_file
@@ -124,4 +120,4 @@ interface = gr.Interface(
     outputs=gr.Video(label="Translated Video")
 )
-interface.launch()

 import requests
 import gradio as gr
 import moviepy.editor as mp
 import torch
 import assemblyai as aai
+# Import specific model components
+from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts
+# Define paths for model and configuration files
+model_path = "./xtts_v2"
+config_path = os.path.join(model_path, "config.json")
+checkpoint_path = model_path
+# Initialize and load the XTTS model
+config = XttsConfig()
+config.load_json(config_path)
+model = Xtts.init_from_config(config)
+model.load_checkpoint(config, checkpoint_dir=checkpoint_path, eval=True)
+model.cuda()  # Move model to GPU if available
+def synthesize_text(text, speaker_wav, language):
+    try:
+        outputs = model.synthesize(
+            text,
+            config,
+            speaker_wav=speaker_wav,
+            gpt_cond_len=3,
+            language=language
+        )
+        return outputs
+    except Exception as e:
+        print(f"Error during synthesis: {e}")
+        raise
 # Translation class
+class Translation:
+    def _init_(self, video_path, original_language, target_language):
         self.video_path = video_path
         self.original_language = original_language
         self.target_language = target_language
         translation = response.json()[0]["translations"][0]["text"]
         return translation
     def generate_audio(self, translated_text):
         try:
+            synthesized_audio = synthesize_text(
+                translated_text,
+                speaker_wav='output_audio.wav',
+                language=self.tran_code
+            )
+            with open("output_synth.wav", "wb") as f:
+                f.write(synthesized_audio)
             return "output_synth.wav"
         except Exception as e:
             print(f"Error generating audio: {e}")
 # Gradio Interface
 def app(video_path, original_language, target_language):
+    translator = Translation(video_path, original_language, target_language)
     video_file = translator.translate_video()
     return video_file
     outputs=gr.Video(label="Translated Video")
 )
+interface.launch(share=True)  # Optional: Set share=True to create a public link