@inbook{9b8f7fca8a074c3e8fc0e54d20c8303d,
title = "Influence of accurate compound noun splitting on bilingual vocabulary extraction",
abstract = "The influence of compound noun splitting on a German-Polish bilingual vocabulary extraction task is investigated. To accomplish this, several unsupervised methods for increasingly accurate compound noun splitting are introduced. Bilingual evidence from a parallel German-Polish corpus and co-occurrence counts from the web are used to disambiguate compound noun analyses directly. These collected splits serve as training data for a probabilistic model that abstracts away from the errors made by the direct methods and reaches an f-measure of 95.10%. Furthermore, these methods are evaluated in terms of word alignment quality and extraction accuracy where linguistically accurate methods are found to outperform the corpus-based methods proposed in the literature. A comparison of alignment quality achieved with the best splitting method and the baseline implies that the effort to build super- vised splitting methods might result in minimal or no performance gains.",
author = "Marcin Junczys-Dowmunt",
year = "2008",
month = oct,
day = "17",
doi = "10.1515/9783110211818.2.91",
language = "English",
volume = "8",
series = "Text, Translation, Computational Processing (TTCP)",
publisher = "De Gruyter Mouton",
pages = "91--104",
editor = "Angelika Storrer and Alexander Geyken and Alexander Siebert and W{\"u}rzner, {Kay-Michael }",
booktitle = "Text Resources and Lexical Knowledge",
address = "Germany",
}