@COMMENT This file was generated by bib2html.pl version 0.94
@COMMENT written by Patrick Riley
@COMMENT This file came from Freek Stulp's publication pages at
@COMMENT http://www-clmc.usc.edu/~stulp/publications
@InProceedings{stulp10reinforcement,
title = {Reinforcement Learning of Full-body Humanoid Motor Skills},
author = {Freek Stulp and Jonas Buchli and Evangelos Theodorou and Stefan Schaal},
booktitle = {10th IEEE-RAS International Conference on Humanoid Robots},
year = {2010},
note = {{\bf Best paper finalist}},
pages = {405-410},
abstract = {Applying reinforcement learning to humanoid robots is challenging because humanoids have a large number of degrees of freedom and state and action spaces are continuous. Thus, most reinforcement learning algorithms would become computationally infeasible and require a prohibitive amount of trials to explore such high-dimensional spaces. In this paper, we present a probabilistic reinforcement learning approach, which is derived from the framework of stochastic optimal control and path integrals. The algorithm, called Policy Improvement with Path Integrals (PI^2), has a surprisingly simple form, has no open tuning parameters besides the exploration noise, is model-free, and performs numerically robustly in high dimensional learning problems. We demonstrate how PI^2 is able to learn full-body motor skills on a 34-DOF humanoid robot. To demonstrate the generality of our approach, we also apply PI^2 in the context of variable impedance control, where both planned trajectories and gain schedules for each joint are optimized simultaneously.},
bib2html_pubtype = {Refereed Conference Paper, Awards},
bib2html_rescat = {Reinforcement Learning of Robot Skills}
}