@COMMENT This file was generated by bib2html.pl version 0.94
@COMMENT written by Patrick Riley
@COMMENT This file came from Freek Stulp's publication pages at
@COMMENT http://www-clmc.usc.edu/~stulp/publications
@InProceedings{stulp12path,
title = {Path Integral Policy Improvement with Covariance Matrix Adaptation},
author = {Freek Stulp and Olivier Sigaud},
booktitle = {Proceedings of the 29th International Conference on Machine Learning (ICML)},
year = {2012},
abstract = {There has been a recent focus in reinforcement learning on addressing continuous state and action problems by optimizing parameterized policies. PI2 is a recent example of this approach. It combines a derivation from first principles of stochastic optimal control with tools from statistical estimation theory. In this paper, we consider PI2- as a member of the wider family of methods which share the concept of probability-weighted averaging to iteratively update parameters to optimize a cost function. At the conceptual level, we compare PI2 to other members of the same family, being Cross-Entropy Methods and CMAES. The comparison suggests the derivation of a novel algorithm which we call PI2-CMA for ``Path Integral Policy Improvement with Covariance Matrix Adaptation''. PI2-CMA's main advantage is that it determines the magnitude of the exploration noise automatically},
bib2html_pubtype = {Refereed Conference Paper},
bib2html_rescat = {Reinforcement Learning of Robot Skills}
}