Spaces:
Running
Running
import os | |
import streamlit as st | |
import time | |
from kokoro import KPipeline | |
import soundfile as sf | |
import io | |
st.title("Text-to-Speech with Kokoro Pipeline") | |
st.markdown("Enter your text and configure options to generate audio segments.") | |
# Text input for the content to be synthesized | |
text = st.text_area( | |
"Enter text", | |
value="The sky above the port was the color of television, tuned to a dead channel.", | |
height=150, | |
) | |
# Voice selection - add more voice options as needed | |
voice_options = { | |
"American English (af_heart)": "af_heart", | |
# You can add more voices here, for example: | |
# "British English (b_voice)": "b_voice", | |
# "Japanese (j_voice)": "j_voice", | |
} | |
voice_choice = st.selectbox("Select Voice", options=list(voice_options.keys())) | |
voice = voice_options[voice_choice] | |
# Slider for speech speed | |
speed = st.slider("Speech Speed", min_value=0.5, max_value=2.0, value=1.0) | |
if st.button("Generate Audio"): | |
if not text.strip(): | |
st.error("Please enter some text!") | |
else: | |
try: | |
# Initialize the Kokoro pipeline. | |
# Ensure that lang_code matches your chosen voice. | |
with st.spinner("Initializing TTS pipeline..."): | |
pipeline = KPipeline(lang_code='a') | |
# Generate audio segments using the pipeline. | |
with st.spinner("Generating audio..."): | |
generator = pipeline( | |
text, | |
voice=voice, | |
speed=speed, | |
split_pattern=r'\n+' | |
) | |
# Process and display each generated segment. | |
segment_index = 0 | |
for gs, ps, audio in generator: | |
st.markdown(f"**Segment {segment_index}**") | |
st.write("**Graphemes/Text:**", gs) | |
st.write("**Phonemes:**", ps) | |
# Convert the generated audio (assumed to be a numpy array) | |
# to a WAV file in-memory so it can be played in Streamlit. | |
audio_buffer = io.BytesIO() | |
sf.write(audio_buffer, audio, 24000, format='WAV') | |
audio_buffer.seek(0) | |
st.audio(audio_buffer, format="audio/wav") | |
segment_index += 1 | |
st.success("Audio generation complete!") | |
except Exception as e: | |
st.error("An error occurred during audio generation.") | |
st.exception(e) |