@inproceedings{f3ac1a085c77439eb0a7278205f598a7,
title = "An environment model for nonstationary reinforcement learning",
abstract = "Reinforcement learning in nonstationary environments is generally regarded as an important and yet difficult problem. This paper partially addresses the problem by formalizing a subclass of nonstationary environments. The environment model, called hidden-mode Markov decision process (HM-MDP), assumes that environmental changes are always confined to a small number of hidden modes. A mode basically indexes a Markov decision process (MDP) and evolves with time according to a Markov chain. While HM-MDP is a special case of partially observable Markov decision processes (POMDP), modeling an HM-MDP environment via the more general POMDP model unnecessarily increases the problem complexity. A variant of the Baum-Welch algorithm is developed for model learning requiring less data and time.",
author = "Choi, {Samuel P.M.} and Yeung, {Dit Yan} and Zhang, {Nevin L.}",
year = "2000",
language = "English",
isbn = "0262194503",
series = "Advances in Neural Information Processing Systems",
pages = "307--313",
booktitle = "Advances in Neural Information Processing Systems 12 - Proceedings of the 1999 Conference, NIPS 1999",
note = "13th Annual Neural Information Processing Systems Conference, NIPS 1999 ; Conference date: 29-11-1999 Through 04-12-1999",
}