@inproceedings{cda025eee29340f3afb1decd816d9847,
title = "Speech Audio Corrector - using speech from non-target speakers for one-off correction of mispronunciations in grapheme-input text-to-speech",
abstract = "Correct pronunciation is essential for text-to-speech (TTS) systems in production. Most production systems rely on pronouncing dictionaries to perform grapheme-to-phoneme conversion. Unlike end-to-end TTS, this enables pronunciation correction by manually altering the phoneme sequence, but the necessary dictionaries are labour-intensive to create and only exist in a few high-resourced languages. This work demonstrates that accurate TTS pronunciation control can be achieved without a dictionary. Moreover, we show that such control can be performed without requiring any model retraining or fine-tuning, merely by supplying a single correctly-pronounced reading of a word in a different voice and accent at synthesis time. Experimental results show that our proposed system successfully enables one-off correction of mispronunciations in grapheme-based TTS with maintained synthesis quality. This opens the door to production-level TTS in languages and applications where pronunciation dictionaries are unavailable.",
keywords = "pronunciation control, speech synthesis",
author = "Jason Fong and Daniel Lyth and Henter, {Gustav Eje} and Hao Tang and Simon King",
note = "Funding Information: Acknowledgements: This work was partially supported by the Wallenberg AI, Autonomous Systems and Software Program (WASP) funded by the Knut and Alice Wallenberg Foundation. Publisher Copyright: Copyright {\textcopyright} 2022 ISCA.; 23rd Annual Conference of the International Speech Communication Association, INTERSPEECH 2022 ; Conference date: 18-09-2022 Through 22-09-2022",
year = "2022",
month = sep,
day = "18",
doi = "10.21437/Interspeech.2022-10138",
language = "English",
volume = "2022-September",
series = "Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH",
publisher = "International Speech Communication Association",
pages = "1213--1217",
editor = "Hanseok Ko and Hansen, {John H. L. }",
booktitle = "Proceedings of the Annual Conference of the International Speech Communication Association, INTERSPEECH",
}