coqui2

Sleeping

App Files Files Community

coqui2 / app.py

Adoetz

Rename 2. gradio.py to app.py

9d5c5f9 verified 2 months ago

raw

history blame

5.47 kB

	from TTS.api import TTS
	import numpy as np
	import torch
	import os
	import gradio as gr
	from scipy.io.wavfile import write as write_wav

	# Check if GPU is available
	if torch.cuda.is_available():
	device = "cuda"
	else:
	device = "cpu"

	# Initialize the TTS object
	tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=True)
	tts.to(device) # Use GPU if available

	# Function to list .wav files in the /clone/ folder
	def list_wav_files():
	clone_folder = "clone"
	if not os.path.exists(clone_folder):
	print(f"Error: Folder '{clone_folder}' not found.")
	return []

	wav_files = [f for f in os.listdir(clone_folder) if f.endswith(".wav")]
	if not wav_files:
	print(f"No .wav files found in '{clone_folder}'.")
	return []

	return wav_files

	# Function to generate TTS audio and save it as a .wav file
	def generate_tts_audio(text, voice_choice, speaker_name=None, wav_file_choice=None, uploaded_file=None, recorded_audio=None):
	# Determine the reference audio file
	if voice_choice == "existing_speaker":
	if not speaker_name:
	return "Error: Speaker name is required for existing speaker.", None
	reference_audio = None
	elif voice_choice == "voice_cloning":
	if recorded_audio:
	# Use the recorded audio for voice cloning
	reference_audio = recorded_audio
	elif uploaded_file:
	# Use the uploaded file for voice cloning
	reference_audio = uploaded_file
	elif wav_file_choice:
	# Use a file from the clone folder
	wav_files = list_wav_files()
	if not wav_files:
	return "Error: No .wav files found for voice cloning.", None

	try:
	wav_file_index = int(wav_file_choice.split(":")[0].strip())
	if wav_file_index < 0 or wav_file_index >= len(wav_files):
	return "Error: Invalid .wav file index.", None
	reference_audio = os.path.join("clone", wav_files[wav_file_index])
	except (ValueError, IndexError, AttributeError):
	return "Error: Invalid .wav file choice.", None
	else:
	return "Error: No reference audio provided for voice cloning.", None
	else:
	return "Error: Invalid voice choice.", None

	# Generate TTS audio
	if reference_audio:
	# Use reference voice (voice cloning)
	audio = tts.tts(
	text=text,
	speaker_wav=reference_audio,
	language="en"
	)
	else:
	# Use existing speaker
	audio = tts.tts(
	text=text,
	speaker=speaker_name,
	language="en"
	)

	# Convert audio to a NumPy array
	audio_np = np.array(audio, dtype=np.float32)

	# Save the audio as a .wav file
	output_file = "output.wav"
	write_wav(output_file, tts.synthesizer.output_sample_rate, audio_np)

	return "Audio generated successfully!", (tts.synthesizer.output_sample_rate, audio_np)

	# Gradio interface
	def create_gradio_interface():
	wav_files = list_wav_files()
	wav_file_choices = [f"{i}: {file}" for i, file in enumerate(wav_files)]

	with gr.Blocks() as demo:
	gr.Markdown("# TTS Streaming System")
	with gr.Row():
	text_input = gr.Textbox(label="Enter text to generate speech", lines=3)
	with gr.Row():
	voice_choice = gr.Radio(choices=["existing_speaker", "voice_cloning"], label="Select voice type")
	with gr.Row():
	speaker_name = gr.Textbox(label="Enter the speaker name (e.g., 'Ana Florence')", visible=False)
	wav_file_choice = gr.Dropdown(choices=wav_file_choices, label="Select a .wav file for cloning", visible=False)
	uploaded_file = gr.Audio(label="Upload your own .wav file for cloning", type="filepath", visible=False)
	recorded_audio = gr.Microphone(label="Record your voice for cloning", type="filepath", visible=False)
	with gr.Row():
	submit_button = gr.Button("Generate Speech")
	with gr.Row():
	output_text = gr.Textbox(label="Output", interactive=False)
	output_audio = gr.Audio(label="Generated Audio", type="numpy", visible=True)

	def update_components(choice):
	if choice == "existing_speaker":
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
	elif choice == "voice_cloning":
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
	else:
	return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

	voice_choice.change(update_components, inputs=voice_choice, outputs=[speaker_name, wav_file_choice, uploaded_file, recorded_audio])

	submit_button.click(
	generate_tts_audio,
	inputs=[text_input, voice_choice, speaker_name, wav_file_choice, uploaded_file, recorded_audio],
	outputs=[output_text, output_audio]
	)

	return demo

	# Launch Gradio interface
	if __name__ == "__main__":
	demo = create_gradio_interface()
	demo.launch(share=True) # Set share=True to create a public link