@article{Zheng_Li_Gao_Hua_Qi_2021, title={Towards Balanced Defect Prediction with Better Information Propagation}, volume={35}, url={https://ojs.aaai.org/index.php/AAAI/article/view/16157}, DOI={10.1609/aaai.v35i1.16157}, abstractNote={Defect prediction, the task of predicting the presence of defects in source code artifacts, has broad application in software development. Defect prediction faces two major challenges, label scarcity, where only a small percentage of code artifacts are labeled, and data imbalance, where the majority of labeled artifacts are non-defective. Moreover, current defect prediction methods ignore the impact of information propagation among code artifacts and this negligence leads to performance degradation. In this paper, we propose DPCAG, a novel model to address the above three issues. We treat code artifacts as nodes in a graph, and learn to propagate influence among neighboring nodes iteratively in an EM framework. DPCAG dynamically adjusts the contributions of each node and selects high-confidence nodes for data augmentation. Experimental results on real-world benchmark datasets show that DPCAG improves performance compare to the state-of-the-art models. In particular, DPCAG achieves substantial performance superiority when measured by Matthews Correlation Coefficient (MCC), a metric that is widely acknowledged to be the most suitable for imbalanced data.}, number={1}, journal={Proceedings of the AAAI Conference on Artificial Intelligence}, author={Zheng, Xianda and Li, Yuan-Fang and Gao, Huan and Hua, Yuncheng and Qi, Guilin}, year={2021}, month={May}, pages={759-767} }