In this paper, we evaluate elderly speaker acoustic models in LVCSR, which are trained by the 301 elderly speakers' database from the age of 60 to 90. Each speaker utters 200 sentences. The elderly speaker PTM (Phonetic Tied Mixture) acoustic model attains 88.9% word recognition rate, which is better than 86.0% word recognition rate by the usual adult (an average age of 28.6) PTM acoustic model. To achieve higher recognition rates, we use two types of speaker adaptation methods, which are a supervised MLLR and an unsupervised adaptation method based on the sufficient HMM statistics. In our experimental results, the elderly acoustic model is better as the adaptation baseline HMM model than the usual adult model for elderly speakers.
@inproceedings{baba01_eurospeech, title = {Elderly acoustic model for large vocabulary continuous speech recognition}, author = {Akira Baba and Shinichi Yoshizawa and Miichi Yamada and Akinobu Lee and Kiyohiro Shikano}, year = {2001}, booktitle = {7th European Conference on Speech Communication and Technology (Eurospeech 2001)}, pages = {1657--1660}, doi = {10.21437/Eurospeech.2001-206}, issn = {1018-4074},}
Cite as:Baba, A., Yoshizawa, S., Yamada, M., Lee, A., Shikano, K. (2001) Elderly acoustic model for large vocabulary continuous speech recognition. Proc. 7th European Conference on Speech Communication and Technology (Eurospeech 2001), 1657-1660, doi: 10.21437/Eurospeech.2001-206