Spaces:
Sleeping
Sleeping
| """A simple web interactive chat demo based on gradio.""" | |
| import os | |
| import time | |
| import gradio as gr | |
| import numpy as np | |
| import spaces | |
| import torch | |
| from inference import OmniInference | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| omni_client = OmniInference('./checkpoint', device) | |
| omni_client.warm_up() | |
| OUT_CHUNK = 4096 | |
| OUT_RATE = 24000 | |
| OUT_CHANNELS = 1 | |
| def process_audio(audio): | |
| filepath = audio | |
| print(f"filepath: {filepath}") | |
| if filepath is None: | |
| return | |
| cnt = 0 | |
| tik = time.time() | |
| for chunk in omni_client.run_AT_batch_stream(filepath): | |
| # Convert chunk to numpy array | |
| if cnt == 0: | |
| print(f"first chunk time cost: {time.time() - tik:.3f}") | |
| cnt += 1 | |
| audio_data = np.frombuffer(chunk, dtype=np.int16) | |
| audio_data = audio_data.reshape(-1, OUT_CHANNELS) | |
| yield OUT_RATE, audio_data.astype(np.int16) | |
| demo = gr.Interface( | |
| process_audio, | |
| inputs=gr.Audio(type="filepath", label="Microphone"), | |
| outputs=[gr.Audio(label="Response", streaming=True, autoplay=True)], | |
| title="Chat Mini-Omni Demo", | |
| live=True, | |
| ) | |
| demo.queue() | |
| demo.launch() |