14 #ifndef MLPACK_METHODS_RL_ASYNC_LEARNING_HPP 15 #define MLPACK_METHODS_RL_ASYNC_LEARNING_HPP 52 typename EnvironmentType,
72 UpdaterType updater = UpdaterType(),
73 EnvironmentType environment = EnvironmentType());
88 template <
typename Measure>
89 void Train(Measure& measure);
97 NetworkType&
Network() {
return learningNetwork; }
99 const NetworkType&
Network()
const {
return learningNetwork; }
104 const PolicyType&
Policy()
const {
return policy; }
109 const UpdaterType&
Updater()
const {
return updater; }
114 const EnvironmentType&
Environment()
const {
return environment; }
121 NetworkType learningNetwork;
130 EnvironmentType environment;
142 typename EnvironmentType,
143 typename NetworkType,
144 typename UpdaterType,
158 typename EnvironmentType,
159 typename NetworkType,
160 typename UpdaterType,
174 typename EnvironmentType,
175 typename NetworkType,
176 typename UpdaterType,
190 typename EnvironmentType,
191 typename NetworkType,
192 typename UpdaterType,
196 NetworkType, UpdaterType, PolicyType>, EnvironmentType, NetworkType,
197 UpdaterType, PolicyType>;
208 typename EnvironmentType,
209 typename NetworkType,
210 typename UpdaterType,
214 NetworkType, UpdaterType, PolicyType>, EnvironmentType, NetworkType,
215 UpdaterType, PolicyType>;
226 typename EnvironmentType,
227 typename NetworkType,
228 typename UpdaterType,
232 NetworkType, UpdaterType, PolicyType>, EnvironmentType, NetworkType,
233 UpdaterType, PolicyType>;
239 #include "async_learning_impl.hpp" const NetworkType & Network() const
Modify learning network.
Linear algebra utility functions, generally performed on matrices or vectors.
EnvironmentType & Environment()
Get the environment.
PolicyType & Policy()
Get behavior policy.
The core includes that mlpack expects; standard C++ includes and Armadillo.
const PolicyType & Policy() const
Modify behavior policy.
UpdaterType & Updater()
Get optimizer.
Forward declaration of OneStepQLearningWorker.
const EnvironmentType & Environment() const
Modify the environment.
AsyncLearning(TrainingConfig config, NetworkType network, PolicyType policy, UpdaterType updater=UpdaterType(), EnvironmentType environment=EnvironmentType())
Construct an instance of the given async learning algorithm.
Forward declaration of NStepQLearningWorker.
TrainingConfig & Config()
Get training config.
Wrapper of various asynchronous learning algorithms, e.g.
const UpdaterType & Updater() const
Modify optimizer.
Forward declaration of OneStepSarsaWorker.
void Train(Measure &measure)
Starting async training.
NetworkType & Network()
Get learning network.
const TrainingConfig & Config() const
Modify training config.