Spaces:

shukdevdatta123
/

VocalForge-AI

Paused

App Files Files Community

VocalForge-AI / app.py

shukdevdatta123

Update app.py

9414725 verified 3 months ago

raw

history blame contribute delete

1.99 kB

	import gradio as gr
	import torch
	import tempfile
	import soundfile as sf
	from tortoise.api import TextToSpeech
	from tortoise.utils.audio import load_audio

	# 1) Initialize the Tortoise TTS engine at startup
	tts = TextToSpeech() # Downloads and caches models automatically

	# 2) Define a helper to generate speech from a reference clip + text
	def generate_speech(reference_audio_path, text):
	"""
	reference_audio_path: filepath to a WAV sampled at 22 050 Hz
	text: the string to synthesize
	returns: path to a 24 kHz WAV file with your cloned voice
	"""
	# ✅ FIXED: Provide sampling_rate as a required positional argument
	ref_waveform = load_audio(reference_audio_path, 22050)

	# Generate speech using 'fast' preset (alternatives: ultra_fast, standard, high_quality)
	output_tensor = tts.tts_with_preset(
	text,
	voice_samples=[ref_waveform],
	preset="fast"
	)

	# Save to temp WAV (float32, 24 kHz)
	wav_np = output_tensor.squeeze().cpu().numpy()
	tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	sf.write(tmp.name, wav_np, samplerate=24000)
	return tmp.name

	# 3) Build the Gradio interface
	with gr.Blocks(title="Tortoise Voice Cloning TTS") as app:
	gr.Markdown("## 🗣️ Voice Cloning with Tortoise TTS")
	gr.Markdown(
	"Upload a ~10 sec WAV clip (22 050 Hz), enter English text, "
	"and hear it spoken back in your voice!"
	)

	with gr.Row():
	voice_sample = gr.Audio(type="filepath", label="🎙️ Upload Reference Voice (22 050 Hz WAV)")
	text_input = gr.Textbox(label="💬 Text to Synthesize", placeholder="e.g., Hello, world!")

	generate_btn = gr.Button("🔊 Generate Speech")
	output_audio = gr.Audio(label="📢 Cloned Speech Output (24 kHz)", interactive=False)

	generate_btn.click(
	fn=generate_speech,
	inputs=[voice_sample, text_input],
	outputs=output_audio
	)

	if __name__ == "__main__":
	app.launch()