Spaces:

Marvis-AI
/

Marvis-TTS-250m

Running on Zero

Marvis-TTS-250m / app.py

Rename model

a10db13 verified 26 days ago

1.61 kB

	import spaces
	import torch
	import gradio as gr
	import numpy as np
	from transformers import AutoProcessor, CsmForConditionalGeneration

	model_id = "Marvis-AI/marvis-tts-250m-v0.1-transformers"
	device = "cuda" if torch.cuda.is_available() else "cpu"

	processor = AutoProcessor.from_pretrained(model_id)
	model = CsmForConditionalGeneration.from_pretrained(model_id, device_map=device)

	@spaces.GPU
	def tts(text: str):
	inputs = processor(
	text,
	add_special_tokens=True,
	return_tensors="pt"
	).to(device)
	if "token_type_ids" in inputs:
	inputs.pop("token_type_ids")
	# generate audio
	audio = model.generate(**inputs, output_audio=True)

	audio_np = audio[0].cpu().numpy()
	return (24_000, audio_np)

	with gr.Blocks(title="Marvis TTS Demo") as demo:
	gr.Markdown("## 🎙️ Marvis TTS Demo\nTry out Marvis TTS with different speakers using `[0]`, `[1]`, etc. before your text!")

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Text Input",
	value="[0] Marvis TTS is a new text-to-speech model that provides fast streaming on edge devices.",
	lines=3,
	placeholder="Enter text here... (prefix with [0], [1], etc. to choose speaker)"
	)
	generate_btn = gr.Button("Generate Speech")

	with gr.Column():
	audio_output = gr.Audio(label="Generated Audio")

	generate_btn.click(
	fn=tts,
	inputs=text_input,
	outputs=audio_output
	)

	if __name__ == "__main__":
	demo.launch()