12 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP 13 #define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP 29 template <
typename EnvironmentType>
34 using State =
typename EnvironmentType::State;
37 using Action =
typename EnvironmentType::Action;
48 const double minReward = -1.0,
49 const double maxReward = 1.0) :
50 environment(environment),
64 return environment.InitialSample();
76 return environment.IsTerminal(state);
93 double unclippedReward = environment.Sample(state, action, nextState);
109 return Sample(state, action, nextState);
129 EnvironmentType environment;
double MaxReward() const
Get the maximum reward value.
Linear algebra utility functions, generally performed on matrices or vectors.
EnvironmentType & Environment()
Modify the environment.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
The core includes that mlpack expects; standard C++ includes and Armadillo.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
Miscellaneous math clamping routines.
typename EnvironmentType::State State
Convenient typedef for state.
typename EnvironmentType::Action Action
Convenient typedef for action.
double & MaxReward()
Modify the maximum reward value.
double & MinReward()
Modify the minimum reward value.
double MinReward() const
Get the minimum reward value.
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
double Sample(const State &state, const Action &action)
Dynamics of Environment.
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
EnvironmentType & Environment() const
Get the environment.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.