@inbook{4ab3a1035db84cd0a9903c2c0546e919,
title = "Word, Syllable and Phoneme Based Metrics Do Not Correlate with Human Performance in ASR-Mediated Tasks",
abstract = "Automatic evaluation metrics should correlate with human judgement. We collected sixteen ASR mediated dialogues using a map task scenario. The material was assessed extrinsically (i.e. in context) through measures like time to task completion and intrinsically (i.e. out of context) using the word error rate and several variants thereof, which are based on smaller units. Extrinsic and intrinsic results did not correlate, neither for word error rate nor for metrics based on characters, syllables or phonemes.",
author = "AnneH. Schneider and Johannes Hellrich and Saturnino Luz",
year = "2014",
doi = "10.1007/978-3-319-10888-9_39",
language = "Undefined/Unknown",
isbn = "978-3-319-10887-2",
volume = "8686",
series = "Lecture Notes in Computer Science",
publisher = "Springer",
pages = "392--399",
editor = "Adam Przepi{\'o}rkowski and Maciej Ogrodniczuk",
booktitle = "Advances in Natural Language Processing",
}