Abstract
In multi-task reinforcement learning there are two main challenges: at training time, the ability to learn different policies with a single model; at test time, inferring which of those policies applying without an external signal. In the case of continual reinforcement learning a third challenge arises: learning tasks sequentially without forgetting the previous ones. In this paper, we tackle these challenges by proposing DisCoRL, an approach combining state representation learning and policy distillation. We experiment on a sequence of three simulated 2D navigation tasks with a 3 wheel omni-directional robot. Moreover, we tested our approach’s robustness by transferring the final policy into a real life setting. The policy can solve all tasks and automatically infer which one to run.
Bibtex
@article{Traore19DisCoRL,
author = {Ren{\'{e}} Traor{\'{e}} and
Hugo Caselles{-}Dupr{\'{e}} and
Timoth{\'{e}}e Lesort and
Te Sun and
Guanghang Cai and
Natalia D{\'{\i}}az Rodr{\'{\i}}guez and
David Filliat},
title = {DisCoRL: Continual Reinforcement Learning via Policy Distillation},
journal = {CoRR},
volume = {abs/1907.05855},
year = {2019},
url = {http://arxiv.org/abs/1907.05855},
archivePrefix = {arXiv},
eprint = {1907.05855},
timestamp = {Wed, 17 Jul 2019 10:27:36 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1907-05855},
bibsource = {dblp computer science bibliography, https://dblp.org}
}