Spaces:

fexeak
/

auido-generation-3B-v2

Sleeping

fexeak

fix app.py

0fd89a7 4 months ago

1.23 kB

	from boson_multimodal.serve.serve_engine import HiggsAudioServeEngine, HiggsAudioResponse
	from boson_multimodal.data_types import ChatMLSample, Message, AudioContent

	import torch
	import torchaudio
	import time
	import click

	MODEL_PATH = "bosonai/higgs-audio-v2-generation-3B-base"
	AUDIO_TOKENIZER_PATH = "bosonai/higgs-audio-v2-tokenizer"

	system_prompt = (
	"Generate audio following instruction.\n\n<\|scene_desc_start\|>\nAudio is recorded from a quiet room.\n<\|scene_desc_end\|>"
	)

	messages = [
	Message(
	role="system",
	content=system_prompt,
	),
	Message(
	role="user",
	content="The sun rises in the east and sets in the west. This simple fact has been observed by humans for thousands of years.",
	),
	]
	device = "cuda" if torch.cuda.is_available() else "cpu"

	serve_engine = HiggsAudioServeEngine(MODEL_PATH, AUDIO_TOKENIZER_PATH, device=device)

	output: HiggsAudioResponse = serve_engine.generate(
	chat_ml_sample=ChatMLSample(messages=messages),
	max_new_tokens=1024,
	temperature=0.3,
	top_p=0.95,
	top_k=50,
	stop_strings=["<\|end_of_text\|>", "<\|eot_id\|>"],
	)
	torchaudio.save(f"output.wav", torch.from_numpy(output.audio)[None, :], output.sampling_rate)