@article{Yang_Zheng_Zhang_Zheng_Li_Pan_2021, title={On Convergence of Gradient Expected Sarsa(λ)}, volume={35}, url={https://ojs.aaai.org/index.php/AAAI/article/view/17270}, DOI={10.1609/aaai.v35i12.17270}, abstractNote={We study the convergence of Expected Sarsa(λ) with function approximation. We show that with off-line es- timate (multi-step bootstrapping) to ExpectedSarsa(λ) is unstable for off-policy learning. Furthermore, based on convex-concave saddle-point framework, we propose a con- vergent Gradient Expected Sarsa(λ) (GES(λ)) algorithm. The theoretical analysis shows that the proposed GES(λ) converges to the optimal solution at a linear convergence rate under true gradient setting. Furthermore, we develop a Lyapunov function technique to investigate how the step- size influences finite-time performance of GES(λ). Addition- ally, such a technique of Lyapunov function can be poten- tially generalized to other gradient temporal difference algo- rithms. Finally, our experiments verify the effectiveness of our GES(λ). For the details of proof, please refer to https: //arxiv.org/pdf/2012.07199.pdf.}, number={12}, journal={Proceedings of the AAAI Conference on Artificial Intelligence}, author={Yang, Long and Zheng, Gang and Zhang, Yu and Zheng, Qian and Li, Pengfei and Pan, Gang}, year={2021}, month={May}, pages={10621-10629} }