13 #ifndef MLPACK_METHODS_RL_POLICY_GREEDY_POLICY_HPP 14 #define MLPACK_METHODS_RL_POLICY_GREEDY_POLICY_HPP 30 template <
typename EnvironmentType>
49 const size_t annealInterval,
50 const double minEpsilon,
51 const double decayRate = 1.0) :
52 epsilon(initialEpsilon),
53 minEpsilon(minEpsilon),
54 delta(((initialEpsilon - minEpsilon) * decayRate) / annealInterval)
66 bool deterministic =
false,
67 const bool isNoisy =
false)
73 if (!deterministic && exploration < epsilon && isNoisy ==
false)
75 action.action =
static_cast<decltype(action.action)
> 81 action.action =
static_cast<decltype(action.action)
>(
82 arma::as_scalar(arma::find(actionValue == actionValue.max(), 1)));
93 epsilon = std::max(minEpsilon, epsilon);
99 const double&
Epsilon()
const {
return epsilon; }
typename EnvironmentType::Action ActionType
Convenient typedef for action.
Linear algebra utility functions, generally performed on matrices or vectors.
Implementation for epsilon greedy policy.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void Anneal()
Exploration probability will anneal at each step.
ActionType Sample(const arma::colvec &actionValue, bool deterministic=false, const bool isNoisy=false)
Sample an action based on given action values.
const double & Epsilon() const
double Random()
Generates a uniform random number between 0 and 1.
int RandInt(const int hiExclusive)
Generates a uniform random integer.
GreedyPolicy(const double initialEpsilon, const size_t annealInterval, const double minEpsilon, const double decayRate=1.0)
Constructor for epsilon greedy policy class.