@inproceedings{d107fb40bb694893abe6e8b8fd035812,
title = "MaCmS: Magahi Code-mixed Dataset for Sentiment Analysis",
abstract = "The present paper introduces new sentiment data, MaCmS, for Magahi-Hindi-English (MHE) code-mixed languages, where Magahi is a less-resourced minority language. This dataset is the first Magahi-Hindi-English code-mixed dataset for sentiment analysis tasks. Further, we provide a linguistic analysis of the dataset to understand the structure of code-mixing and a statistical study to understand the language preferences of speakers with different sentiment categories. With these analyses, we also train baseline models to evaluate the dataset's quality.",
keywords = "Code-mixing, Less-resourced language, Magahi, Sentiment Analysis",
author = "Priya Rani and Gaurav Negi and Theodorus Fransen and McCrae, \{John P.\}",
note = "Publisher Copyright: {\textcopyright} 2024 ELRA Language Resource Association: CC BY-NC 4.0.; Joint 30th International Conference on Computational Linguistics and 14th International Conference on Language Resources and Evaluation, LREC-COLING 2024 ; Conference date: 20-05-2024 Through 25-05-2024",
year = "2024",
language = "English",
series = "2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC-COLING 2024 - Main Conference Proceedings",
publisher = "European Language Resources Association (ELRA)",
pages = "10880--10889",
editor = "Nicoletta Calzolari and Min-Yen Kan and Veronique Hoste and Alessandro Lenci and Sakriani Sakti and Nianwen Xue",
booktitle = "2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC-COLING 2024 - Main Conference Proceedings",
}