reward_clipping.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
13 #define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
14 
15 #include <mlpack/prereqs.hpp>
17 
18 namespace mlpack {
19 namespace rl {
20 
29 template <typename EnvironmentType>
31 {
32  public:
34  using State = typename EnvironmentType::State;
35 
37  using Action = typename EnvironmentType::Action;
38 
47  RewardClipping(EnvironmentType& environment,
48  const double minReward = -1.0,
49  const double maxReward = 1.0) :
50  environment(environment),
51  minReward(minReward),
52  maxReward(maxReward)
53  {
54  // Nothing to do here
55  }
56 
63  {
64  return environment.InitialSample();
65  }
66 
74  bool IsTerminal(const State& state) const
75  {
76  return environment.IsTerminal(state);
77  }
78 
88  double Sample(const State& state,
89  const Action& action,
90  State& nextState)
91  {
92  // Get original unclipped reward from base environment.
93  double unclippedReward = environment.Sample(state, action, nextState);
94  // Clip rewards according to the min and max limit and return.
95  return math::ClampRange(unclippedReward, minReward, maxReward);
96  }
97 
106  double Sample(const State& state, const Action& action)
107  {
108  State nextState;
109  return Sample(state, action, nextState);
110  }
111 
113  EnvironmentType& Environment() const { return environment; }
115  EnvironmentType& Environment() { return environment; }
116 
118  double MinReward() const { return minReward; }
120  double& MinReward() { return minReward; }
121 
123  double MaxReward() const { return maxReward; }
125  double& MaxReward() { return maxReward; }
126 
127  private:
129  EnvironmentType environment;
130 
132  double minReward;
133 
135  double maxReward;
136 };
137 
138 } // namespace rl
139 } // namespace mlpack
140 
141 #endif
double MaxReward() const
Get the maximum reward value.
Linear algebra utility functions, generally performed on matrices or vectors.
EnvironmentType & Environment()
Modify the environment.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
The core includes that mlpack expects; standard C++ includes and Armadillo.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
Miscellaneous math clamping routines.
typename EnvironmentType::State State
Convenient typedef for state.
typename EnvironmentType::Action Action
Convenient typedef for action.
double & MaxReward()
Modify the maximum reward value.
double & MinReward()
Modify the minimum reward value.
double MinReward() const
Get the minimum reward value.
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
double Sample(const State &state, const Action &action)
Dynamics of Environment.
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
EnvironmentType & Environment() const
Get the environment.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53