@article{Ma_Guo_Niu_Lin_Tang_Ma_Ren_Wang_2020, title={PCONV: The Missing but Desirable Sparsity in DNN Weight Pruning for Real-Time Execution on Mobile Devices}, volume={34}, url={https://ojs.aaai.org/index.php/AAAI/article/view/5954}, DOI={10.1609/aaai.v34i04.5954}, abstractNote={<p>Model compression techniques on Deep Neural Network (DNN) have been widely acknowledged as an effective way to achieve acceleration on a variety of platforms, and DNN weight pruning is a straightforward and effective method. There are currently two mainstreams of pruning methods representing two extremes of pruning regularity: <em>non-structured</em>, fine-grained pruning can achieve high sparsity and accuracy, but is not hardware friendly; <em>structured</em>, coarse-grained pruning exploits hardware-efficient structures in pruning, but suffers from accuracy drop when the pruning rate is high. In this paper, we introduce <em>PCONV</em>, comprising a new sparsity dimension, – fine-grained pruning patterns inside the coarse-grained structures. <em>PCONV</em> comprises two types of sparsities, Sparse Convolution Patterns (SCP) which is generated from intra-convolution kernel pruning and connectivity sparsity generated from inter-convolution kernel pruning. Essentially, SCP enhances accuracy due to its special vision properties, and connectivity sparsity increases pruning rate while maintaining balanced workload on filter computation. To deploy <em>PCONV</em>, we develop a novel compiler-assisted DNN inference framework and execute <em>PCONV</em> models in real-time without accuracy compromise, which cannot be achieved in prior work. Our experimental results show that, <em>PCONV</em> outperforms three state-of-art end-to-end DNN frameworks, TensorFlow-Lite, TVM, and Alibaba Mobile Neural Network with speedup up to 39.2 ×, 11.4 ×, and 6.3 ×, respectively, with no accuracy loss. Mobile devices can achieve real-time inference on large-scale DNNs.</p>}, number={04}, journal={Proceedings of the AAAI Conference on Artificial Intelligence}, author={Ma, Xiaolong and Guo, Fu-Ming and Niu, Wei and Lin, Xue and Tang, Jian and Ma, Kaisheng and Ren, Bin and Wang, Yanzhi}, year={2020}, month={Apr.}, pages={5117-5124} }