mlpack-git/doxygen/sac_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_SAC_HPP
 #define MLPACK_METHODS_RL_SAC_HPP

 #include <mlpack/prereqs.hpp>
 #include <ensmallen.hpp>

 #include "replay/random_replay.hpp"
 #include <mlpack/methods/ann/activation_functions/tanh_function.hpp>
 #include <mlpack/methods/ann/loss_functions/mean_squared_error.hpp>
 #include <mlpack/methods/ann/visitor/parameters_visitor.hpp>
 #include "training_config.hpp"

 namespace mlpack {
 namespace rl {

 template <
   typename EnvironmentType,
   typename QNetworkType,
   typename PolicyNetworkType,
   typename UpdaterType,
   typename ReplayType = RandomReplay<EnvironmentType>
 >
 class SAC
 {
  public:
   using StateType = typename EnvironmentType::State;

   using ActionType = typename EnvironmentType::Action;

   SAC(TrainingConfig& config,
       QNetworkType& learningQ1Network,
       PolicyNetworkType& policyNetwork,
       ReplayType& replayMethod,
       UpdaterType qNetworkUpdater = UpdaterType(),
       UpdaterType policyNetworkUpdater = UpdaterType(),
       EnvironmentType environment = EnvironmentType());

   ~SAC();

   void SoftUpdate(double rho);

   void Update();

   void SelectAction();

   double Episode();

   size_t& TotalSteps() { return totalSteps; }
   const size_t& TotalSteps() const { return totalSteps; }

   StateType& State() { return state; }
   const StateType& State() const { return state; }

   const ActionType& Action() const { return action; }

   bool& Deterministic() { return deterministic; }
   const bool& Deterministic() const { return deterministic; }


  private:
   TrainingConfig& config;

   QNetworkType& learningQ1Network;
   QNetworkType learningQ2Network;

   QNetworkType targetQ1Network;
   QNetworkType targetQ2Network;

   PolicyNetworkType& policyNetwork;

   ReplayType& replayMethod;

   UpdaterType qNetworkUpdater;
   #if ENS_VERSION_MAJOR >= 2
   typename UpdaterType::template Policy<arma::mat, arma::mat>*
       qNetworkUpdatePolicy;
   #endif

   UpdaterType policyNetworkUpdater;
   #if ENS_VERSION_MAJOR >= 2
   typename UpdaterType::template Policy<arma::mat, arma::mat>*
       policyNetworkUpdatePolicy;
   #endif

   EnvironmentType environment;

   size_t totalSteps;

   StateType state;

   ActionType action;

   bool deterministic;

   mlpack::ann::MeanSquaredError<> lossFunction;
 };

 } // namespace rl
 } // namespace mlpack

 // Include implementation
 #include "sac_impl.hpp"
 #endif
mlpack::rl::SAC::~SAC
~SAC()
Clean memory.

mlpack::rl::SAC::ActionType
typename EnvironmentType::Action ActionType
Convenient typedef for action.
Definition: sac.hpp:71

mlpack::rl::SAC::SelectAction
void SelectAction()
Select an action, given an agent.

parameters_visitor.hpp

mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_cli11.hpp:21

mlpack::rl::SAC
Implementation of Soft Actor-Critic, a model-free off-policy actor-critic based deep reinforcement le...
Definition: sac.hpp:64

random_replay.hpp

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

mlpack::rl::SAC::Episode
double Episode()
Execute an episode.

mlpack::rl::SAC::SAC
SAC(TrainingConfig &config, QNetworkType &learningQ1Network, PolicyNetworkType &policyNetwork, ReplayType &replayMethod, UpdaterType qNetworkUpdater=UpdaterType(), UpdaterType policyNetworkUpdater=UpdaterType(), EnvironmentType environment=EnvironmentType())
Create the SAC object with given settings.

mlpack::rl::SAC::Update
void Update()
Update the Q and policy networks.

mlpack::rl::SAC::State
const StateType & State() const
Get the state of the agent.
Definition: sac.hpp:134

mlpack::rl::TrainingConfig
Definition: training_config.hpp:19

mlpack::rl::SAC::TotalSteps
size_t & TotalSteps()
Modify total steps from beginning.
Definition: sac.hpp:127

mean_squared_error.hpp

mlpack::rl::SAC::SoftUpdate
void SoftUpdate(double rho)
Softly update the learning Q network parameters to the target Q network parameters.

mlpack::rl::SAC::Deterministic
bool & Deterministic()
Modify the training mode / test mode indicator.
Definition: sac.hpp:140

mlpack::rl::SAC::TotalSteps
const size_t & TotalSteps() const
Get total steps from beginning.
Definition: sac.hpp:129

training_config.hpp

mlpack::ann::MeanSquaredError
The mean squared error performance function measures the network&#39;s performance according to the mean ...
Definition: mean_squared_error.hpp:34

mlpack::rl::SAC::Action
const ActionType & Action() const
Get the action of the agent.
Definition: sac.hpp:137

mlpack::rl::SAC::State
StateType & State()
Modify the state of the agent.
Definition: sac.hpp:132

tanh_function.hpp

mlpack::rl::SAC::Deterministic
const bool & Deterministic() const
Get the indicator of training mode / test mode.
Definition: sac.hpp:142

mlpack::rl::SAC::StateType
typename EnvironmentType::State StateType
Convenient typedef for state.
Definition: sac.hpp:68