@article{Srinivasan_Talvitie_Bowling_2015, title={Improving Exploration in UCT Using Local Manifolds}, volume={29}, url={https://ojs.aaai.org/index.php/AAAI/article/view/9660}, DOI={10.1609/aaai.v29i1.9660}, abstractNote={ <p> Monte-Carlo planning has been proven successful in manysequential decision-making settings, but it suffers from poorexploration when the rewards are sparse. In this paper, weimprove exploration in UCT by generalizing across similarstates using a given distance metric. We show that this algorithm,like UCT, converges asymptotically to the optimalaction. When the state space does not have a natural distancemetric, we show how we can learn a local manifold from thetransition graph of states in the near future. to obtain a distancemetric. On domains inspired by video games, empiricalevidence shows that our algorithm is more sample efficientthan UCT, particularly when rewards are sparse. </p> }, number={1}, journal={Proceedings of the AAAI Conference on Artificial Intelligence}, author={Srinivasan, Sriram and Talvitie, Erik and Bowling, Michael}, year={2015}, month={Mar.} }