@inproceedings{bf40211b60d04f139450173c54d1f032,
title = "Learning I/O Variables from Scientific Software{\textquoteright}s User Manuals",
abstract = "Scientific software often involves many input and output variables. Identifying these variables is important for such software engineering tasks as metamorphic testing. To reduce the manual work, we report in this paper our investigation of machine learning algorithms in classifying variables from software{\textquoteright}s user manuals. We identify thirteen natural-language features, and use them to develop a multi-layer solution where the first layer distinguishes variables from non-variables and the second layer classifies the variables into input and output types. Our experimental results on three scientific software systems show that random forest and feedforward neural network can be used to best implement the first layer and second layer respectively.",
keywords = "Classification, Machine learning, Scientific software, Software documentation, User manual",
author = "Zedong Peng and Xuanyi Lin and Santhoshkumar, \{Sreelekhaa Nagamalli\} and Nan Niu and Upulee Kanewala",
note = "Publisher Copyright: {\textcopyright} 2022, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 22nd Annual International Conference on Computational Science, ICCS 2022 ; Conference date: 21-06-2022 Through 23-06-2022",
year = "2022",
doi = "10.1007/978-3-031-08760-8\_42",
language = "English",
isbn = "9783031087592",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "503--516",
editor = "Derek Groen and \{de Mulatier\}, Cl{\'e}lia and Krzhizhanovskaya, \{Valeria V.\} and Sloot, \{Peter M.A.\} and Maciej Paszynski and Dongarra, \{Jack J.\}",
booktitle = "Computational Science - ICCS 2022, 22nd International Conference, Proceedings",
}