10.29327/1588952.28-12
Recent advancements in artificial intelligence (AI), particularly in large language models (LLMs), offer new possibilities for automating requirements generation from elicitation interviews. This study compares the performance of ChatGPT-4 and DeepSeek-V3 in generating software requirements based on transcribed stakeholder interviews. Using two case studies, the LLMs were tasked with identifying functional and non-functional requirements. The results indicate that ChatGPT-4 performed better in extracting precise requirements, particularly nonfunctional ones, while DeepSeek-V3 demonstrated advantages in efficiency. However, both models exhibited limitations in handling ambiguity and properly categorizing requirements. This study highlights the potential of LLMs in Requirements Engineering while emphasizing the need for improved prompt/dialogues techniques and human supervision. Future research should explore hybrid AI-human approaches and domain-specific fine-tuning to enhance requirement extraction accuracy.
Keywords: Requirements engineering; Large Language Models; Requirement generation.
@inproceedings{wer202511,
author = {Almeida, C. and Copque, I. and Oliveira, A. and Arouca, M. and Barbosa, A. and Freire, S. and Mendonça, M. and Leite, J. C.},
title = {From Elicitation Interviews to Software Requirements: Evaluating LLM Performance in Requirement Generation},
booktitle = {Anais do Workshop em Engenharia de Requisitos - Proceedings of the 28th Workshop on Requirements Engineering (WER2025)},
year = {2025},
issn = {2675-0066},
isbn = {978-65-01-52831-1},
doi = {10.29327/1588952.28-12}
}