Spaces:

Saitama070
/

Trump-LipSync

Running

File size: 4,014 Bytes

fc646be
dda36f3
 
fc646be
 
11fd342
e32ee6c
dda36f3
11fd342
8cab254
 
 
dda36f3
 
 
 
 
 
 
 
 
 
 
fc646be
dda36f3
 
 
 
 
 
 
 
 
fc646be
dda36f3
 
11fd342
dda36f3
11fd342
 
 
 
 
 
 
 
dda36f3
 
 
 
fc646be
dda36f3
fc646be
dda36f3
 
8cab254
dda36f3
 
 
 
 
 
 
 
 
 
fc646be
 
dda36f3
 
 
 
 
 
 
 
fc646be
dda36f3
 
 
 
 
 
 
 
 
 
 
 
 
8cab254
dda36f3
 
 
 
 
 
 
 
fc646be
dda36f3
 
 
 
fc646be

import os
import zipfile
import subprocess
import gradio as gr
from tortoise.api import TextToSpeech
import torchaudio  # Added import for torchaudio

# Paths
ZIP_FILE = "Trump-LipSync.zip"  
EXTRACTED_FOLDER = "Trump-LipSync"
TRUMP_VIDEO_PATH = os.path.join(EXTRACTED_FOLDER,EXTRACTED_FOLDER, "videos", "trump.mp4")  
CHECKPOINT_PATH = os.path.join(EXTRACTED_FOLDER,EXTRACTED_FOLDER, "Wav2Lip", "checkpoints", "wav2lip_gan.pth")  

# Function to extract the zip file
def extract_zip():
    try:
        # Check if the zip file exists
        if not os.path.exists(ZIP_FILE):
            return f"Error: {ZIP_FILE} not found."

        # Check if the folder is already extracted
        if os.path.exists(EXTRACTED_FOLDER):
            return f"Folder {EXTRACTED_FOLDER} already exists."

        # Extract the zip file
        with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
            zip_ref.extractall(EXTRACTED_FOLDER)

        return f"Zip file extracted successfully to {EXTRACTED_FOLDER}!"
    except Exception as e:
        return f"Error extracting zip file: {str(e)}"

# Function to generate speech using Tortoise-TTS
def generate_speech(text, output_wav):
    try:
        tts = TextToSpeech()

        speech = tts.tts(text, "trump")
        
        speech = speech.squeeze()  
        if speech.dim() == 1:
            speech = speech.unsqueeze(0)  
        
        sample_rate = 22050  
        torchaudio.save(output_wav, speech.cpu(), sample_rate)
        
        return True
    except Exception as e:
        print(f"Error generating speech: {str(e)}")
        return False

# Function to run Wav2Lip for lip-syncing
def run_wav2lip(video_path, audio_path, output_video):
    try:
        command = [
            "python", os.path.join(EXTRACTED_FOLDER,EXTRACTED_FOLDER, "Wav2Lip", "inference.py"),
            "--checkpoint_path", CHECKPOINT_PATH,
            "--face", video_path,
            "--audio", audio_path,
            "--outfile", output_video
        ]
        subprocess.run(command, check=True)
        return True
    except Exception as e:
        print(f"Error running Wav2Lip: {str(e)}")
        return False

def process_lipsync(text):
    try:
        # Check if the zip file is extracted
        if not os.path.exists(EXTRACTED_FOLDER):
            return "Error: Zip file not extracted. Please check the logs.", None

        # Check if Trump's video exists
        if not os.path.exists(TRUMP_VIDEO_PATH):
            return "Error: Trump's video not found.", None

        # Check if Wav2Lip checkpoint exists
        if not os.path.exists(CHECKPOINT_PATH):
            return "Error: Wav2Lip checkpoint not found.", None

        # Define output file paths
        output_wav = "generated_speech.wav"
        output_video = "lip_synced_output.mp4"

        # Generate speech using Tortoise-TTS
        if not generate_speech(text, output_wav):
            return "Error: Failed to generate speech.", None

        # Run Wav2Lip to sync the generated speech with Trump's video
        if not run_wav2lip(TRUMP_VIDEO_PATH,EXTRACTED_FOLDER, output_wav, output_video):
            return "Error: Failed to run Wav2Lip.", None

        # Return success message and the output video
        return "Lip-synced video generated!", output_video
    except Exception as e:
        return f"Error processing lip-sync: {str(e)}", None

# Gradio interface
def main():
    # Extract the zip file first
    extract_status = extract_zip()
    print(extract_status)

    # Create Gradio interface
    iface = gr.Interface(
        fn=process_lipsync,
        inputs=[
            gr.Textbox(label="Enter text for speech synthesis")
        ],
        outputs=[
            gr.Textbox(label="Status"),
            gr.Video(label="Generated Lip-Synced Video")
        ],
        title="TTS & Lip Sync Generator (Trump Only)",
        description="Enter text to generate a lip-synced video using Trump's voice and video."
    )
    iface.launch()

if __name__ == "__main__":
    main()