Spaces:

stepfun-ai
/

Step-Audio

Running

File size: 1,497 Bytes

b007bca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ccdff04
b007bca
 
 
 
 
 
 
 
 
 
 
 
 
 
ccdff04
b007bca
ccdff04
b007bca

from openai import OpenAI, APIStatusError
import os

AUDIO_CHAT_MODEL = os.getenv("STEP_AUDIO_CHAT", "step-1o-audio")
TTS_MODEL = os.getenv("STEP_AUDIO_TTS", "step-tts-mini")
ASR_MODEL = os.getenv("STEP_AUDIO_ASR", "step-asr")
STEP_BASE_URL = os.getenv("STEP_BASE_URL", "https://api.stepfun.com/v1")

client = OpenAI(
    base_url=STEP_BASE_URL,
)


def call_audiochat(messages):
    try:
        completion = client.chat.completions.create(
            model=AUDIO_CHAT_MODEL,
            messages=messages,
            presence_penalty=1,
        )
        return completion.choices[0].message.content
    except APIStatusError as e:
        print(e)
        raise RuntimeError(e)
    except Exception as e:
        raise e


def call_tts(text, audio_path, voice="jingdiannvsheng"):
    response = client.audio.speech.create(model=TTS_MODEL, voice=voice, input=text)
    response.stream_to_file(audio_path)
    return True


def call_asr(audio_path):
    with open(audio_path, "rb") as audio_file:
        response = client.audio.transcriptions.create(
            model=ASR_MODEL,
            file=audio_file,
            response_format="json",
        )
    return response.text


if __name__ == "__main__":
    messages = [{"role": "user", "content": "介绍下你自己"}]
    res = call_audiochat(messages)
    print("call audiochat: ", res)

    audio_path = "test.mp3"
    text = "hello, 阶跃"
    call_tts(text, audio_path)

    res = call_asr(audio_path)
    print("call asr:", res)