Spaces:
Runtime error
Runtime error
| @inproceedings{shen2018natural, | |
| title={Natural tts synthesis by conditioning wavenet on mel spectrogram predictions}, | |
| author={Shen, Jonathan and Pang, Ruoming and Weiss, Ron J and Schuster, Mike and Jaitly, Navdeep and Yang, Zongheng and Chen, Zhifeng and Zhang, Yu and Wang, Yuxuan and Skerrv-Ryan, Rj and others}, | |
| booktitle={2018 IEEE international conference on acoustics, speech and signal processing (ICASSP)}, | |
| pages={4779--4783}, | |
| year={2018}, | |
| organization={IEEE} | |
| } | |
| @inproceedings{lancucki2021fastpitch, | |
| title={Fastpitch: Parallel text-to-speech with pitch prediction}, | |
| author={{\L}a{\'n}cucki, Adrian}, | |
| booktitle={ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, | |
| pages={6588--6592}, | |
| year={2021}, | |
| organization={IEEE} | |
| } | |
| @inproceedings{tatanov2022mixer, | |
| title={{Mixer-TTS}: non-autoregressive, fast and compact text-to-speech model conditioned on language model embeddings}, | |
| author={Tatanov, Oktai and Beliaev, Stanislav and Ginsburg, Boris}, | |
| booktitle={ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, | |
| pages={7482--7486}, | |
| year={2022}, | |
| organization={IEEE} | |
| } | |
| @inproceedings{shih2021rad, | |
| title={{RAD-TTS}: Parallel flow-based {TTS} with robust alignment learning and diverse synthesis}, | |
| author={Shih, Kevin J and Valle, Rafael and Badlani, Rohan and Lancucki, Adrian and Ping, Wei and Catanzaro, Bryan}, | |
| booktitle={ICML Workshop on Invertible Neural Networks, Normalizing Flows, and Explicit Likelihood Models}, | |
| year={2021} | |
| } | |
| @article{kong2020hifi, | |
| title={{HiFi-GAN}: Generative adversarial networks for efficient and high fidelity speech synthesis}, | |
| author={Kong, Jungil and Kim, Jaehyeon and Bae, Jaekyoung}, | |
| journal={Advances in Neural Information Processing Systems}, | |
| volume={33}, | |
| pages={17022--17033}, | |
| year={2020} | |
| } | |
| @inproceedings{prenger2019waveglow, | |
| title={Waveglow: A flow-based generative network for speech synthesis}, | |
| author={Prenger, Ryan and Valle, Rafael and Catanzaro, Bryan}, | |
| booktitle={ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, | |
| pages={3617--3621}, | |
| year={2019}, | |
| organization={IEEE} | |
| } | |
| @inproceedings{jang21_interspeech, | |
| author={Won Jang and Dan Lim and Jaesam Yoon and Bongwan Kim and Juntae Kim}, | |
| title={{UnivNet: A Neural Vocoder with Multi-Resolution Spectrogram Discriminators for High-Fidelity Waveform Generation}}, | |
| year=2021, | |
| booktitle={Proc. Interspeech 2021}, | |
| pages={2207--2211}, | |
| doi={10.21437/Interspeech.2021-1016} | |
| } | |
| @inproceedings{badlani2022one, | |
| title={One {TTS} alignment to rule them all}, | |
| author={Badlani, Rohan and {\L}a{\'n}cucki, Adrian and Shih, Kevin J and Valle, Rafael and Ping, Wei and Catanzaro, Bryan}, | |
| booktitle={ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, | |
| pages={6092--6096}, | |
| year={2022}, | |
| organization={IEEE} | |
| } | |
| @article{xue2021byt5, | |
| title={ByT5: Towards a token-free future with pre-trained byte-to-byte models 2021}, | |
| author={Xue, Linting and Barua, Aditya and Constant, Noah and Al-Rfou, Rami and Narang, Sharan and Kale, Mihir and Roberts, Adam and Raffel, Colin}, | |
| journal={arXiv preprint arXiv:2105.13626}, | |
| year={2021} | |
| } | |
| @article{vrezavckova2021t5g2p, | |
| title={T5g2p: Using text-to-text transfer transformer for grapheme-to-phoneme conversion}, | |
| author={{\v{R}}ez{\'a}{\v{c}}kov{\'a}, Mark{\'e}ta and {\v{S}}vec, Jan and Tihelka, Daniel}, | |
| year={2021}, | |
| journal={International Speech Communication Association} | |
| } | |
| @article{zhu2022byt5, | |
| title={ByT5 model for massively multilingual grapheme-to-phoneme conversion}, | |
| author={Zhu, Jian and Zhang, Cong and Jurgens, David}, | |
| journal={arXiv preprint arXiv:2204.03067}, | |
| year={2022} | |
| } | |
| @article{ggulati2020conformer, | |
| title={Conformer: Convolution-augmented transformer for speech recognition}, | |
| author={Gulati, Anmol and Qin, James and Chiu, Chung-Cheng and Parmar, Niki and Zhang, Yu and Yu, Jiahui and Han, Wei and Wang, Shibo and Zhang, Zhengdong and Wu, Yonghui and others}, | |
| journal={arXiv preprint arXiv:2005.08100}, | |
| year={2020} | |
| } | |
| @inproceedings{gorman2018improving, | |
| title={Improving homograph disambiguation with supervised machine learning}, | |
| author={Gorman, Kyle and Mazovetskiy, Gleb and Nikolaev, Vitaly}, | |
| booktitle={Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, | |
| year={2018} | |
| } | |
| @inproceedings{kim2021conditional, | |
| title={Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech}, | |
| author={Kim, Jaehyeon and Kong, Jungil and Son, Juhee}, | |
| booktitle={International Conference on Machine Learning}, | |
| pages={5530--5540}, | |
| year={2021}, | |
| organization={PMLR} | |
| } | |
| @article{zeghidour2022soundstream, | |
| author={Zeghidour, Neil and Luebs, Alejandro and Omran, Ahmed and Skoglund, Jan and Tagliasacchi, Marco}, | |
| journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, | |
| title={{SoundStream}: An End-to-End Neural Audio Codec}, | |
| year={2022}, | |
| volume={30}, | |
| pages={495-507}, | |
| doi={10.1109/TASLP.2021.3129994} | |
| } | |
| @article{defossez2022encodec, | |
| title={High fidelity neural audio compression}, | |
| author={D{\'e}fossez, Alexandre and Copet, Jade and Synnaeve, Gabriel and Adi, Yossi}, | |
| journal={arXiv preprint arXiv:2210.13438}, | |
| year={2022} | |
| } | |
| @article{mentzer2023finite, | |
| title={Finite scalar quantization: {VQ-VAE} made simple}, | |
| author={Mentzer, Fabian and Minnen, David and Agustsson, Eirikur and Tschannen, Michael}, | |
| journal={arXiv preprint arXiv:2309.15505}, | |
| year={2023} | |
| } |