{ "cells": [ { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "(1, 478720)\n" ] } ], "source": [ "import numpy as np, json, yaml\n", "from scipy.io.wavfile import write\n", "\n", "with open('/projects/mixart/doodle-musicgen/.conf/generation_conf.yaml') as f:\n", " conf = yaml.safe_load(f)\n", "with open('response.json','r') as f:\n", " res = json.load(f)\n", "audio = res['audio']\n", "print(type(audio))\n", "audio = np.array(audio)\n", "print(audio.shape)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "rate = conf['sampling_rate']\n", "scaled = np.int16(audio.flatten() * int(2**16//2-1))\n", "write('response.wav', rate, scaled)" ] } ], "metadata": { "kernelspec": { "display_name": "ezout-vision", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }