@inproceedings{edf74aee9d7e4c04870539c545641d47,
title = "The WikEd Error Corpus: A Corpus of Corrective Wikipedia Edits and its Application to Grammatical Error Correction",
abstract = "This paper introduces the freely available WikEd Error Corpus. We describe the data mining process from Wikipedia revision histories, corpus content and format. The corpus consists of more than 12 million sentences with a total of 14 million edits of various types. As one possible application, we show that WikEd can be successfully adapted to improve a strong baseline in an ESL grammatical error correction task by 2.63 Used together with an ESL error corpus, a composed system gains 1.64% when compared to the ESL-trained system.",
keywords = "error corpus, wikipedia revision histories, grammatical error correction",
author = "Roman Grundkiewicz and Marcin Junczys-Dowmunt",
year = "2014",
doi = "10.1007/978-3-319-10888-9_47",
language = "English",
isbn = "978-3-319-10888-9",
volume = "8686",
series = "Lecture Notes in Computer Science",
publisher = "Springer",
pages = "478--490",
editor = "Adam Przepi{\'o}rkowski and Maciej Ogrodniczuk",
booktitle = "Advances in Natural Language Processing -- Lecture Notes in Computer Science",
address = "United Kingdom",
note = "9th International Conference on Natural Language Processing (PoITAL 2014) ; Conference date: 17-09-2014 Through 19-09-2014",
}