@inproceedings{352284e2c2b54a05944d589085f61954,
title = "Preprocessing and Feature Selection Techniques for Enhancing AI Model Performance on Intrusion Detection System Datasets",
abstract = "Effective preprocessing and feature selection are pivotal for optimizing AI model performance in cybersecurity. This work focuses on the application of advanced preprocessing techniques to the CSECICIDS2017 and CICIDS2018 datasets, emphasizing the use of Naive Bayes and Random Forest algorithms for feature selection alongside Correlation-based Feature Selection (CFS). These methods identify the most relevant features, ensuring the refinement of data for subsequent analysis. Additionally, t-SNE (t-distributed Stochastic Neighbor Embedding) is employed for visualizing high-dimensional data, providing insights into feature distribution and model performance. These methodologies aim to streamline the preprocessing pipeline, improve feature relevance, and facilitate better understanding of data patterns, ultimately advancing the utility of machine learning models in cybersecurity.",
keywords = "CFS, CIC-IDS datasets, Feature selection, Naive Bayes, Preprocessing, Random forest, t-SNE",
author = "Oussama Aziz and Khawla Tadist and Souad Alaoui and Nikolov, \{Nikola S.\}",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2026.; 5th International Conference on Digital Technologies and Applications, ICDTA 2025 ; Conference date: 17-04-2025 Through 18-04-2025",
year = "2026",
doi = "10.1007/978-3-032-07718-9\_10",
language = "English",
isbn = "9783032077172",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "113--125",
editor = "Saad Motahhir and Badre Bossoufi and Guerrero, \{Josep M.\}",
booktitle = "Digital Technologies and Applications - Proceedings of ICDTA 2025, Volume 1",
}