@inproceedings{8c5c50c3e64040c784fe5ff47c4be6ba,
title = "Hadoop Dataset for Job Estimation in the Cloud with Limited Bandwidth",
abstract = "Hadoop MapReduce is a well-known open source framework for processing a large amount of data in a cluster of machines; it has been adopted by many organizations and deployed on-premise and on the cloud. MapReduce job execution time estimation and prediction are crucial for efficient scheduling, resource management, better energy consumption, and cost saving. In this paper, we present our new dataset of MapReduce job traces in a cloud environment with limited network bandwidth; we describe the process of generating and collecting the dataset in this paper. We believe that this dataset will help researchers develop new scheduling approaches and improve Hadoop MapReduce job performance.",
keywords = "Bandwidth, Cloud computing, Estimating the runtime, Hadoop, MapReduce",
author = "Mohammed Bergui and Nikolov, {Nikola S.} and Said Najah",
note = "Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 8th Future of Information and Computing Conference, FICC 2023 ; Conference date: 02-03-2023 Through 03-03-2023",
year = "2023",
doi = "10.1007/978-3-031-28073-3_24",
language = "English",
isbn = "9783031280726",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "341--348",
editor = "Kohei Arai",
booktitle = "Advances in Information and Communication - Proceedings of the 2023 Future of Information and Communication Conference FICC",
}