13 #ifndef MLPACK_METHODS_RL_TRAINING_CONFIG_HPP 14 #define MLPACK_METHODS_RL_TRAINING_CONFIG_HPP 25 targetNetworkSyncInterval(100),
31 doubleQLearning(false),
32 noisyQLearning(false),
42 size_t updateInterval,
43 size_t targetNetworkSyncInterval,
45 size_t explorationSteps,
56 numWorkers(numWorkers),
57 updateInterval(updateInterval),
58 targetNetworkSyncInterval(targetNetworkSyncInterval),
60 explorationSteps(explorationSteps),
63 gradientLimit(gradientLimit),
64 doubleQLearning(doubleQLearning),
65 noisyQLearning(noisyQLearning),
66 isCategorical(isCategorical),
85 {
return targetNetworkSyncInterval; }
138 double VMin()
const {
return vMin; }
140 double&
VMin() {
return vMin; }
143 double VMax()
const {
return vMax; }
145 double&
VMax() {
return vMax; }
148 double Rho()
const {
return rho; }
150 double&
Rho() {
return rho; }
165 size_t updateInterval;
171 size_t targetNetworkSyncInterval;
184 size_t explorationSteps;
202 double gradientLimit;
208 bool doubleQLearning;
double & Discount()
Modify the discount rate for future reward.
size_t ExplorationSteps() const
Get the exploration steps.
size_t & AtomSize()
Modify the number of atoms.
Linear algebra utility functions, generally performed on matrices or vectors.
double VMin() const
Get the minimum value for support.
bool NoisyQLearning() const
Get the indicator of noisy q-learning.
size_t NumWorkers() const
Get the amount of workers.
double VMax() const
Get the maximum value for support.
size_t StepLimit() const
Get the maximum steps of each episode.
size_t TargetNetworkSyncInterval() const
Get the interval for syncing target network.
size_t & StepLimit()
Modify the maximum steps of each episode.
double & Rho()
Modify the rho value for sac.
size_t & TargetNetworkSyncInterval()
Modify the interval for syncing target network.
double & VMin()
Modify the minimum value for support.
TrainingConfig(size_t numWorkers, size_t updateInterval, size_t targetNetworkSyncInterval, size_t stepLimit, size_t explorationSteps, double stepSize, double discount, double gradientLimit, bool doubleQLearning, bool noisyQLearning, bool isCategorical, size_t atomSize, double vMin, double vMax, double rho)
size_t UpdateInterval() const
Get the update interval.
double Discount() const
Get the discount rate for future reward.
size_t & NumWorkers()
Modify the amount of workers.
bool DoubleQLearning() const
Get the indicator of double q-learning.
size_t & ExplorationSteps()
Modify the exploration steps.
double & VMax()
Modify the maximum value for support.
bool & DoubleQLearning()
Modify the indicator of double q-learning.
double GradientLimit() const
Get the limit of update gradient.
size_t & UpdateInterval()
Modify the update interval.
double Rho() const
Get the rho value for sac.
double & GradientLimit()
Modify the limit of update gradient.
double StepSize() const
Get the step size of the optimizer.
bool IsCategorical() const
Get the indicator of categorical q-learning.
double & StepSize()
Modify the step size of the optimizer.
bool & NoisyQLearning()
Modify the indicator of double q-learning.
size_t AtomSize() const
Get the number of atoms.
bool & IsCategorical()
Modify the indicator of categorical q-learning.