elve / app.py
Niansuh's picture
Update app.py
8662041 verified
raw
history blame
4.39 kB
import time
import requests
from io import BytesIO
from pathlib import Path
from typing import List
import re
import tempfile
from flask import Flask, request, render_template, send_file
app = Flask(__name__)
class SentenceTokenizer:
"""Advanced sentence tokenizer with support for complex cases."""
def __init__(self):
self.SENTENCE_END = re.compile(
r'(?<=[.!?])\s+(?=[A-Z])|(?<=[。!?])\s+',
re.VERBOSE
)
def tokenize(self, text: str) -> List[str]:
if not text or not text.strip():
return []
sentences = self.SENTENCE_END.split(text.strip())
return [s.strip() for s in sentences if s.strip()]
def split_sentences(text: str) -> List[str]:
tokenizer = SentenceTokenizer()
return tokenizer.tokenize(text)
class ElevenlabsTTS:
"""Text-to-speech provider using Elevenlabs API."""
def __init__(self):
self.session = requests.Session()
self.session.headers.update({"User-Agent": "Mozilla/5.0"})
self.cache_dir = Path(tempfile.gettempdir())
self.all_voices = {
"Brian": "nPczCjzI2devNBz1zQrb", "Alice": "Xb7hH8MSUJpSbSDYk0k2",
"Bill": "pqHfZKP75CvOlQylNhV4", "Callum": "N2lVS1w4EtoT3dr4eOWO",
"Charlie": "IKne3meq5aSn9XLyUdCD", "Charlotte": "XB0fDUnXU5powFXDhCwa",
"Chris": "iP95p4xoKVk53GoZ742B", "Daniel": "onwK4e9ZLuTAKqWW03F9",
"Eric": "cjVigY5qzO86Huf0OWal", "George": "JBFqnCBsd6RMkjVDRZzb",
"Jessica": "cgSgspJ2msm6clMCkdW9", "Laura": "FGY2WhTYpPnrIDTdsKH5",
"Liam": "TX3LPaxmHKxFdv7VOQHJ", "Lily": "pFZP5JQG7iQjIQuC4Bku",
"Matilda": "XrExE9yKIg1WjnnlVkGX", "Sarah": "EXAVITQu4vr4xnSDxMaL",
"Will": "bIHbv24MWmeRgasZH58o", "Neal": "Zp1aWhL05Pi5BkhizFC3"
}
self.params = {'allow_unauthenticated': '1'}
self.preview_text = "Hello, this is a sample of my voice."
def tts(self, text: str, voice: str = "Brian") -> str:
if voice not in self.all_voices:
raise ValueError(f"Voice '{voice}' not available")
filename = self.cache_dir / f"tts_{voice}_{int(time.time())}.mp3"
sentences = split_sentences(text)
audio_chunks = {}
for i, sentence in enumerate(sentences, 1):
json_data = {'text': sentence, 'model_id': 'eleven_multilingual_v2'}
response = self.session.post(
f'https://api.elevenlabs.io/v1/text-to-speech/{self.all_voices[voice]}',
params=self.params,
json=json_data,
timeout=20
)
response.raise_for_status()
audio_chunks[i] = response.content
combined_audio = BytesIO()
for i in sorted(audio_chunks.keys()):
combined_audio.write(audio_chunks[i])
with open(filename, 'wb') as f:
f.write(combined_audio.getvalue())
return filename.as_posix()
def generate_preview(self, voice: str) -> str:
preview_file = self.cache_dir / f"preview_{voice}.mp3"
if not preview_file.exists():
return self.tts(self.preview_text, voice)
return preview_file.as_posix()
# Web Interface
tts_provider = ElevenlabsTTS()
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'POST':
if 'generate' in request.form:
text = request.form.get('text')
voice = request.form.get('voice', 'Brian')
try:
audio_file = tts_provider.tts(text, voice)
return send_file(audio_file, mimetype='audio/mpeg', as_attachment=True, download_name=f"{voice}_output.mp3")
except Exception as e:
return render_template('index.html', error=str(e), voices=tts_provider.all_voices.keys())
# Generate previews on startup or first visit
previews = {voice: tts_provider.generate_preview(voice) for voice in tts_provider.all_voices.keys()}
return render_template('index.html', voices=tts_provider.all_voices.keys(), previews=previews)
@app.route('/preview/<voice>')
def preview(voice):
try:
audio_file = tts_provider.generate_preview(voice)
return send_file(audio_file, mimetype='audio/mpeg')
except Exception as e:
return str(e), 500
if __name__ == "__main__":
app.run(host='0.0.0.0', port=5000)