14 #ifndef MLPACK_METHODS_RL_POLICY_AGGREGATED_POLICY_HPP 15 #define MLPACK_METHODS_RL_POLICY_AGGREGATED_POLICY_HPP 26 template <
typename PolicyType>
40 const arma::colvec& distribution) :
41 policies(
std::move(policies)),
42 sampler({distribution})
55 return policies.front().Sample(actionValue,
true);
56 size_t selected = arma::as_scalar(sampler.
Random());
57 return policies[selected].Sample(actionValue,
false);
65 for (PolicyType& policy : policies)
71 std::vector<PolicyType> policies;
Linear algebra utility functions, generally performed on matrices or vectors.
A discrete distribution where the only observations are discrete observations.
The core includes that mlpack expects; standard C++ includes and Armadillo.
arma::vec Random() const
Return a randomly generated observation (one-dimensional vector; one observation) according to the pr...
void Anneal()
Exploration probability will anneal at each step.
ActionType Sample(const arma::colvec &actionValue, bool deterministic=false)
Sample an action based on given action values.
typename PolicyType::ActionType ActionType
Convenient typedef for action.
AggregatedPolicy(std::vector< PolicyType > policies, const arma::colvec &distribution)