@inproceedings{8ce2cdc0e34849db89c0413ef6dade9e,
title = "Detection of Racism on Multilingual Social Media: An NLP Approach",
abstract = "This paper presents a comparison between various text vectorization and machine learning algorithms for solving the problem of detection of racism on multi-lingual social media. We train classification models on Facebook comments and tweets in three different languages: English, French and Arabic. Our findings suggest that for the English-language comments, the combination of KNN with TF-IDF works best with an accuracy of 78.34%, while for French, the use of the SVM classifier with BOW provides an accuracy of 82.56%. For Arabic we obtain an accuracy of 91.13% when KNN is coupled with BOW. Overall, our results suggest that the combination of SVM and TF-IDF is the best choice for detection of racism on social media that contains content in English, French and Arabic at the same time. As part of this work, we also present a new annotated dataset of social media comments in three languages.",
keywords = "detection of racism, machine learning, NLP",
author = "{El Miqdadi}, Ikram and Jamal Kharroubi and Nikolov, {Nikola S.}",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.; 11th World Conference on Information Systems and Technologies, WorldCIST 2023 ; Conference date: 04-04-2023 Through 06-04-2023",
year = "2024",
doi = "10.1007/978-3-031-45642-8_43",
language = "English",
isbn = "9783031456411",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "436--445",
editor = "Alvaro Rocha and Hojjat Adeli and Gintautas Dzemyda and Fernando Moreira and Valentina Colla",
booktitle = "Information Systems and Technologies - WorldCIST 2023",
}