training_config.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_RL_TRAINING_CONFIG_HPP
14 #define MLPACK_METHODS_RL_TRAINING_CONFIG_HPP
15 
16 namespace mlpack {
17 namespace rl {
18 
20 {
21  public:
23  numWorkers(1),
24  updateInterval(1),
25  targetNetworkSyncInterval(100),
26  stepLimit(200),
27  explorationSteps(1),
28  stepSize(0.01),
29  discount(0.99),
30  gradientLimit(40),
31  doubleQLearning(false),
32  noisyQLearning(false),
33  isCategorical(false),
34  atomSize(51),
35  vMin(0),
36  vMax(200),
37  rho(0.005)
38  { /* Nothing to do here. */ }
39 
41  size_t numWorkers,
42  size_t updateInterval,
43  size_t targetNetworkSyncInterval,
44  size_t stepLimit,
45  size_t explorationSteps,
46  double stepSize,
47  double discount,
48  double gradientLimit,
49  bool doubleQLearning,
50  bool noisyQLearning,
51  bool isCategorical,
52  size_t atomSize,
53  double vMin,
54  double vMax,
55  double rho) :
56  numWorkers(numWorkers),
57  updateInterval(updateInterval),
58  targetNetworkSyncInterval(targetNetworkSyncInterval),
59  stepLimit(stepLimit),
60  explorationSteps(explorationSteps),
61  stepSize(stepSize),
62  discount(discount),
63  gradientLimit(gradientLimit),
64  doubleQLearning(doubleQLearning),
65  noisyQLearning(noisyQLearning),
66  isCategorical(isCategorical),
67  atomSize(atomSize),
68  vMin(vMin),
69  vMax(vMax),
70  rho(rho)
71  { /* Nothing to do here. */ }
72 
74  size_t NumWorkers() const { return numWorkers; }
76  size_t& NumWorkers() { return numWorkers; }
77 
79  size_t UpdateInterval() const { return updateInterval; }
81  size_t& UpdateInterval() { return updateInterval; }
82 
85  { return targetNetworkSyncInterval; }
87  size_t& TargetNetworkSyncInterval() { return targetNetworkSyncInterval; }
88 
90  size_t StepLimit() const { return stepLimit; }
95  size_t& StepLimit() { return stepLimit; }
96 
98  size_t ExplorationSteps() const { return explorationSteps; }
100  size_t& ExplorationSteps() { return explorationSteps; }
101 
103  double StepSize() const { return stepSize; }
105  double& StepSize() { return stepSize; }
106 
108  double Discount() const { return discount; }
110  double& Discount() { return discount; }
111 
113  double GradientLimit() const { return gradientLimit; }
115  double& GradientLimit() { return gradientLimit; }
116 
118  bool DoubleQLearning() const { return doubleQLearning; }
120  bool& DoubleQLearning() { return doubleQLearning; }
121 
123  bool NoisyQLearning() const { return noisyQLearning; }
125  bool& NoisyQLearning() { return noisyQLearning; }
126 
128  bool IsCategorical() const { return isCategorical; }
130  bool& IsCategorical() { return isCategorical; }
131 
133  size_t AtomSize() const { return atomSize; }
135  size_t& AtomSize() { return atomSize; }
136 
138  double VMin() const { return vMin; }
140  double& VMin() { return vMin; }
141 
143  double VMax() const { return vMax; }
145  double& VMax() { return vMax; }
146 
148  double Rho() const { return rho; }
150  double& Rho() { return rho; }
151 
152  private:
157  size_t numWorkers;
158 
165  size_t updateInterval;
166 
171  size_t targetNetworkSyncInterval;
172 
177  size_t stepLimit;
178 
184  size_t explorationSteps;
185 
190  double stepSize;
191 
196  double discount;
197 
202  double gradientLimit;
203 
208  bool doubleQLearning;
209 
214  bool noisyQLearning;
215 
220  bool isCategorical;
221 
226  size_t atomSize;
227 
232  double vMin;
233 
238  double vMax;
239 
244  double rho;
245 };
246 
247 } // namespace rl
248 } // namespace mlpack
249 
250 #endif
double & Discount()
Modify the discount rate for future reward.
size_t ExplorationSteps() const
Get the exploration steps.
size_t & AtomSize()
Modify the number of atoms.
Linear algebra utility functions, generally performed on matrices or vectors.
double VMin() const
Get the minimum value for support.
bool NoisyQLearning() const
Get the indicator of noisy q-learning.
size_t NumWorkers() const
Get the amount of workers.
double VMax() const
Get the maximum value for support.
size_t StepLimit() const
Get the maximum steps of each episode.
size_t TargetNetworkSyncInterval() const
Get the interval for syncing target network.
size_t & StepLimit()
Modify the maximum steps of each episode.
double & Rho()
Modify the rho value for sac.
size_t & TargetNetworkSyncInterval()
Modify the interval for syncing target network.
double & VMin()
Modify the minimum value for support.
TrainingConfig(size_t numWorkers, size_t updateInterval, size_t targetNetworkSyncInterval, size_t stepLimit, size_t explorationSteps, double stepSize, double discount, double gradientLimit, bool doubleQLearning, bool noisyQLearning, bool isCategorical, size_t atomSize, double vMin, double vMax, double rho)
size_t UpdateInterval() const
Get the update interval.
double Discount() const
Get the discount rate for future reward.
size_t & NumWorkers()
Modify the amount of workers.
bool DoubleQLearning() const
Get the indicator of double q-learning.
size_t & ExplorationSteps()
Modify the exploration steps.
double & VMax()
Modify the maximum value for support.
bool & DoubleQLearning()
Modify the indicator of double q-learning.
double GradientLimit() const
Get the limit of update gradient.
size_t & UpdateInterval()
Modify the update interval.
double Rho() const
Get the rho value for sac.
double & GradientLimit()
Modify the limit of update gradient.
double StepSize() const
Get the step size of the optimizer.
bool IsCategorical() const
Get the indicator of categorical q-learning.
double & StepSize()
Modify the step size of the optimizer.
bool & NoisyQLearning()
Modify the indicator of double q-learning.
size_t AtomSize() const
Get the number of atoms.
bool & IsCategorical()
Modify the indicator of categorical q-learning.