@inproceedings{5dda9581104244c1be6236c1637e73a3,
title = "Using a cross-language information retrieval system based on OHSUMED to evaluate the Moses and KantanMT statistical machine translation systems",
abstract = "The objective of this paper was to evaluate the performance of two statistical machine translation (SMT) systems within a cross-language information retrieval (CLIR) architecture and examine if there is a correlation between translation quality and CLIR performance. The SMT systems were KantanMT, a cloud-based machine translation (MT) platform, and Moses, an open-source MT application. First we trained both systems using the same language resources: the EMEA corpus for the translation model and language model and the QTLP corpus for tuning. Then we translated the 63 queries of the OHSUMED test collection from Greek into English using both MT systems. Next, we ran the queries on the document collection using Apache Solr to get a list of the top ten matches. The results were compared to the OHSUMED gold standard. KantanMT achieved higher average precision and F-measure than Moses, while both systems produced the same recall score. We also calculated the BLEU score for each system using the ECDC corpus. Moses achieved a higher BLEU score than KantanMT. Finally, we also tested the IR performance of the original English queries. This work overall showed that CLIR performance can be better even when BLEU score is worse.",
keywords = "Apache Solr, Cross-language information retrieval, ECDC, EMEA, KantanMT, Moses, OHSUMED, QTLP, Statistical machine translation",
author = "Nikolaos Katris and Richard Sutcliffe and Theodore Kalamboukis",
year = "2016",
language = "English",
series = "Proceedings of the 10th International Conference on Language Resources and Evaluation, LREC 2016",
publisher = "European Language Resources Association (ELRA)",
pages = "368--372",
editor = "Nicoletta Calzolari and Khalid Choukri and Helene Mazo and Asuncion Moreno and Thierry Declerck and Sara Goggi and Marko Grobelnik and Jan Odijk and Stelios Piperidis and Bente Maegaard and Joseph Mariani",
booktitle = "Proceedings of the 10th International Conference on Language Resources and Evaluation, LREC 2016",
note = "10th International Conference on Language Resources and Evaluation, LREC 2016 ; Conference date: 23-05-2016 Through 28-05-2016",
}