@article{Wang_2019, title={A Sharper Generalization Bound for Divide-and-Conquer Ridge Regression}, volume={33}, url={https://ojs.aaai.org/index.php/AAAI/article/view/4467}, DOI={10.1609/aaai.v33i01.33015305}, abstractNote={<p>We study the distributed machine learning problem where the <em>n</em> feature-response pairs are partitioned among <em>m</em> machines uniformly at random. The goal is to approximately solve an empirical risk minimization (ERM) problem with the minimum amount of communication. The divide-and-conquer (DC) method, which was proposed several years ago, lets every worker machine independently solve the same ERM problem using its local feature-response pairs and the driver machine combine the solutions. This approach is in one-shot and thereby extremely communication-efficient. Although the DC method has been studied by many prior works, reasonable generalization bound has not been established before this work.</p><p>For the ridge regression problem, we show that the prediction error of the DC method on unseen test samples is at most <em>ε</em> times larger than the optimal. There have been constantfactor bounds in the prior works, their sample complexities have a quadratic dependence on <em>d</em>, which does not match the setting of most real-world problems. In contrast, our bounds are much stronger. First, our 1 + <em>ε</em> error bound is much better than their constant-factor bounds. Second, our sample complexity is merely linear with <em>d</em>.</p>}, number={01}, journal={Proceedings of the AAAI Conference on Artificial Intelligence}, author={Wang, Shusen}, year={2019}, month={Jul.}, pages={5305-5312} }