mlpack-git/doxygen/random__replay_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_REPLAY_RANDOM_REPLAY_HPP
 #define MLPACK_METHODS_RL_REPLAY_RANDOM_REPLAY_HPP

 #include <mlpack/prereqs.hpp>
 #include <cassert>

 namespace mlpack {
 namespace rl {

 template <typename EnvironmentType>
 class RandomReplay
 {
  public:
   using ActionType = typename EnvironmentType::Action;

   using StateType = typename EnvironmentType::State;

   struct Transition
   {
     StateType state;
     ActionType action;
     double reward;
     StateType nextState;
     bool isEnd;
   };

   RandomReplay():
       batchSize(0),
       capacity(0),
       position(0),
       full(false),
       nSteps(0)
   { /* Nothing to do here. */ }

   RandomReplay(const size_t batchSize,
                const size_t capacity,
                const size_t nSteps = 1,
                const size_t dimension = StateType::dimension) :
       batchSize(batchSize),
       capacity(capacity),
       position(0),
       full(false),
       nSteps(nSteps),
       states(dimension, capacity),
       actions(capacity),
       rewards(capacity),
       nextStates(dimension, capacity),
       isTerminal(capacity)
   { /* Nothing to do here. */ }

   void Store(StateType state,
              ActionType action,
              double reward,
              StateType nextState,
              bool isEnd,
              const double& discount)
   {
     nStepBuffer.push_back({state, action, reward, nextState, isEnd});

     // Single step transition is not ready.
     if (nStepBuffer.size() < nSteps)
       return;

     // To keep the queue size fixed to nSteps.
     if (nStepBuffer.size() > nSteps)
       nStepBuffer.pop_front();

     // Before moving ahead, lets confirm if our fixed size buffer works.
     assert(nStepBuffer.size() == nSteps);

     // Make a n-step transition.
     GetNStepInfo(reward, nextState, isEnd, discount);

     state = nStepBuffer.front().state;
     action = nStepBuffer.front().action;

     states.col(position) = state.Encode();
     actions[position] = action;
     rewards(position) = reward;
     nextStates.col(position) = nextState.Encode();
     isTerminal(position) = isEnd;
     position++;
     if (position == capacity)
     {
       full = true;
       position = 0;
     }
   }

   void GetNStepInfo(double& reward,
                     StateType& nextState,
                     bool& isEnd,
                     const double& discount)
   {
     reward = nStepBuffer.back().reward;
     nextState = nStepBuffer.back().nextState;
     isEnd = nStepBuffer.back().isEnd;

     // Should start from the second last transition in buffer.
     for (int i = nStepBuffer.size() - 2; i >= 0; i--)
     {
       bool iE = nStepBuffer[i].isEnd;
       reward = nStepBuffer[i].reward + discount * reward * (1 - iE);
       if (iE)
       {
         nextState = nStepBuffer[i].nextState;
         isEnd = iE;
       }
     }
   }

   void Sample(arma::mat& sampledStates,
               std::vector<ActionType>& sampledActions,
               arma::rowvec& sampledRewards,
               arma::mat& sampledNextStates,
               arma::irowvec& isTerminal)
   {
     size_t upperBound = full ? capacity : position;
     arma::uvec sampledIndices = arma::randi<arma::uvec>(
         batchSize, arma::distr_param(0, upperBound - 1));

     sampledStates = states.cols(sampledIndices);
     for (size_t t = 0; t < sampledIndices.n_rows; t ++)
       sampledActions.push_back(actions[sampledIndices[t]]);
     sampledRewards = rewards.elem(sampledIndices).t();
     sampledNextStates = nextStates.cols(sampledIndices);
     isTerminal = this->isTerminal.elem(sampledIndices).t();
   }

   const size_t& Size()
   {
     return full ? capacity : position;
   }

   void Update(arma::mat /* target */,
               std::vector<ActionType> /* sampledActions */,
               arma::mat /* nextActionValues */,
               arma::mat& /* gradients */)
   {
     /* Do nothing for random replay. */
   }

   const size_t& NSteps() const { return nSteps; }

  private:
   size_t batchSize;

   size_t capacity;

   size_t position;

   bool full;

   size_t nSteps;

   std::deque<Transition> nStepBuffer;

   arma::mat states;

   std::vector<ActionType> actions;

   arma::rowvec rewards;

   arma::mat nextStates;

   arma::irowvec isTerminal;
 };

 } // namespace rl
 } // namespace mlpack

 #endif
mlpack::rl::RandomReplay::Store
void Store(StateType state, ActionType action, double reward, StateType nextState, bool isEnd, const double &discount)
Store the given experience.
Definition: random_replay.hpp:104

mlpack::rl::RandomReplay::Transition::reward
double reward
Definition: random_replay.hpp:57

mlpack::rl::RandomReplay::Update
void Update(arma::mat, std::vector< ActionType >, arma::mat, arma::mat &)
Update the priorities of transitions and Update the gradients.
Definition: random_replay.hpp:219

mlpack::rl::RandomReplay::Transition::isEnd
bool isEnd
Definition: random_replay.hpp:59

mlpack::rl::RandomReplay::Transition
Definition: random_replay.hpp:53

mlpack::rl::RandomReplay::ActionType
typename EnvironmentType::Action ActionType
Convenient typedef for action.
Definition: random_replay.hpp:48

mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_cli11.hpp:21

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

mlpack::rl::RandomReplay::StateType
typename EnvironmentType::State StateType
Convenient typedef for state.
Definition: random_replay.hpp:51

mlpack::rl::RandomReplay::Transition::nextState
StateType nextState
Definition: random_replay.hpp:58

mlpack::rl::RandomReplay::RandomReplay
RandomReplay(const size_t batchSize, const size_t capacity, const size_t nSteps=1, const size_t dimension=StateType::dimension)
Construct an instance of random experience replay class.
Definition: random_replay.hpp:78

mlpack::rl::RandomReplay::Sample
void Sample(arma::mat &sampledStates, std::vector< ActionType > &sampledActions, arma::rowvec &sampledRewards, arma::mat &sampledNextStates, arma::irowvec &isTerminal)
Sample some experiences.
Definition: random_replay.hpp:183

mlpack::rl::RandomReplay::RandomReplay
RandomReplay()
Definition: random_replay.hpp:62

mlpack::rl::RandomReplay::Size
const size_t & Size()
Get the number of transitions in the memory.
Definition: random_replay.hpp:206

mlpack::rl::RandomReplay::GetNStepInfo
void GetNStepInfo(double &reward, StateType &nextState, bool &isEnd, const double &discount)
Get the reward, next state and terminal boolean for nth step.
Definition: random_replay.hpp:151

mlpack::rl::RandomReplay::NSteps
const size_t & NSteps() const
Get the number of steps for n-step agent.
Definition: random_replay.hpp:228

mlpack::rl::RandomReplay::Transition::state
StateType state
Definition: random_replay.hpp:55

mlpack::rl::RandomReplay
Implementation of random experience replay.
Definition: random_replay.hpp:44

mlpack::rl::RandomReplay::Transition::action
ActionType action
Definition: random_replay.hpp:56