Spaces:
Running
on
L4
Running
on
L4
File size: 1,883 Bytes
a121edc c00be4b a121edc c00be4b a121edc c00be4b a121edc 5152717 a121edc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import os
import requests
from pathlib import Path
from typing import List
import nls
class CosyVoiceSynthesizer:
def __init__(self) -> None:
self.app_key = os.environ.get('ALIYUN_APP_KEY')
def call(self, save_file, transcript, voice="longyuan", sample_rate=16000):
writer = open(save_file, "wb")
return_data = b''
def write_data(data, *args):
nonlocal return_data
return_data += data
if writer is not None:
writer.write(data)
def raise_error(error, *args):
raise RuntimeError(
f'Synthesizing speech failed with error: {error}')
def close_file(*args):
if writer is not None:
writer.close()
response = requests.get("https://alice-open.oss-cn-zhangjiakou.aliyuncs.com/nls_token.txt")
token = response.text.strip()
sdk = nls.NlsStreamInputTtsSynthesizer(
url='wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1',
token=token,
appkey=self.app_key,
on_data=write_data,
on_error=raise_error,
on_close=close_file,
)
sdk.startStreamInputTts(voice=voice, sample_rate=sample_rate, aformat='wav')
sdk.sendStreamInputTts(transcript,)
sdk.stopStreamInputTts()
class CosyVoiceAgent:
def __init__(self, config) -> None:
self.config = config
def call(self, pages: List, device: str, save_path: str):
save_path = Path(save_path)
generation_agent = CosyVoiceSynthesizer()
for idx, page in enumerate(pages):
generation_agent.call(
save_file=save_path / f"p{idx + 1}.wav",
transcript=page,
**self.config["call_cfg"]
)
return {
"modality": "speech"
}
|