@inproceedings{f41ae560c81845a88c23a37311845e7f,
title = "Making Test Corpora for Question Answering More Representative",
abstract = "Despite two high profile series of challenges devoted to question answering technologies there remains no formal study into the representativeness that question corpora bear to real end-user inputs. We examine the corpora used presently and historically in the TREC and QALD challenges in juxtaposition with two more from natural sources and identify a degree of disjointedness between the two. We analyse these differences in depth before discussing a candidate approach to question corpora generation and provide a juxtaposition on its own representativeness. We conclude that these artificial corpora have good overall coverage of grammatical structures but the distribution is skewed, meaning performance measures may be inaccurate.",
author = "Andrew Walker and Andrew Starkey and Pan, {Jeff Z.} and Advaith Siddharthan",
year = "2014",
month = sep,
day = "18",
doi = "10.1007/978-3-319-11382-1_1",
language = "English",
isbn = "978-3-319-11381-4",
series = "Lecture Notes in Computer Science",
publisher = "Springer",
pages = "1--6",
editor = "Evangelos Kanoulas and Mihai Lupu and Paul Clough and Mark Sanderson and Mark Hall and Allan Hanbury and Elaine Toms",
booktitle = "Information Access Evaluation. Multilinguality, Multimodality, and Interaction",
address = "United Kingdom",
note = "2014 Cross Language Evaluation Forum Conference, CLEF 2014 ; Conference date: 15-09-2014 Through 18-09-2014",
}