Spaces:

erfyhersr
/

Kokoro-TTS

Running

App Files Files Community

Kokoro-TTS / app.py

erfyhersr

Create app.py

f7446d6 verified about 1 month ago

raw

history blame contribute delete

2.54 kB

	import os
	import streamlit as st
	import time
	from kokoro import KPipeline
	import soundfile as sf
	import io

	st.title("Text-to-Speech with Kokoro Pipeline")
	st.markdown("Enter your text and configure options to generate audio segments.")

	# Text input for the content to be synthesized
	text = st.text_area(
	"Enter text",
	value="The sky above the port was the color of television, tuned to a dead channel.",
	height=150,
	)

	# Voice selection - add more voice options as needed
	voice_options = {
	"American English (af_heart)": "af_heart",
	# You can add more voices here, for example:
	# "British English (b_voice)": "b_voice",
	# "Japanese (j_voice)": "j_voice",
	}
	voice_choice = st.selectbox("Select Voice", options=list(voice_options.keys()))
	voice = voice_options[voice_choice]

	# Slider for speech speed
	speed = st.slider("Speech Speed", min_value=0.5, max_value=2.0, value=1.0)

	if st.button("Generate Audio"):
	if not text.strip():
	st.error("Please enter some text!")
	else:
	try:
	# Initialize the Kokoro pipeline.
	# Ensure that lang_code matches your chosen voice.
	with st.spinner("Initializing TTS pipeline..."):
	pipeline = KPipeline(lang_code='a')

	# Generate audio segments using the pipeline.
	with st.spinner("Generating audio..."):
	generator = pipeline(
	text,
	voice=voice,
	speed=speed,
	split_pattern=r'\n+'
	)

	# Process and display each generated segment.
	segment_index = 0
	for gs, ps, audio in generator:
	st.markdown(f"Segment {segment_index}")
	st.write("Graphemes/Text:", gs)
	st.write("Phonemes:", ps)

	# Convert the generated audio (assumed to be a numpy array)
	# to a WAV file in-memory so it can be played in Streamlit.
	audio_buffer = io.BytesIO()
	sf.write(audio_buffer, audio, 24000, format='WAV')
	audio_buffer.seek(0)
	st.audio(audio_buffer, format="audio/wav")

	segment_index += 1
	st.success("Audio generation complete!")
	except Exception as e:
	st.error("An error occurred during audio generation.")
	st.exception(e)