mlpack-git/doxygen/pendulum_8hpp_source.html

 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
 #define MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP

 #include <mlpack/prereqs.hpp>
 #include <mlpack/core/math/clamp.hpp>

 namespace mlpack {
 namespace rl {

 class Pendulum
 {
  public:
   class State
   {
    public:
     State() : theta(0), data(dimension, arma::fill::zeros)
     { /* Nothing to do here. */ }

     State(const arma::colvec& data): theta(0), data(data)
     { /* Nothing to do here. */ }

     arma::colvec& Data() { return data; }

     double Theta() const { return theta; }
     double& Theta() { return theta; }

     double AngularVelocity() const { return data[2]; }
     double& AngularVelocity() { return data[2]; }

     const arma::colvec& Encode() { return data; }

     void SetState()
     {
       data[0] = std::sin(theta);
       data[1] = std::cos(theta);
     }

     static constexpr size_t dimension = 3;

    private:
     double theta;

     arma::colvec data;
   };

   class Action
   {
    public:
     Action() : action(1)
     { /* Nothing to do here */ }
     std::vector<double> action;
     // Storing degree of freedom.
     static const size_t size = 1;
   };

   Pendulum(const size_t maxSteps = 200,
            const double maxAngularVelocity = 8,
            const double maxTorque = 2.0,
            const double dt = 0.05,
            const double doneReward = 0.0) :
       maxSteps(maxSteps),
       maxAngularVelocity(maxAngularVelocity),
       maxTorque(maxTorque),
       dt(dt),
       doneReward(doneReward),
       stepsPerformed(0)
   { /* Nothing to do here */ }

   double Sample(const State& state,
                 const Action& action,
                 State& nextState)
   {
     // Update the number of steps performed.
     stepsPerformed++;

     // Get current state.
     double theta = state.Theta();
     double angularVelocity = state.AngularVelocity();

     // Define constants which specify our pendulum.
     const double gravity = 10.0;
     const double mass = 1.0;
     const double length = 1.0;

     // Get action and clip the values between max and min limits.
     double torque = math::ClampRange(action.action[0], -maxTorque, maxTorque);

     // Calculate costs of taking this action in the current state.
     double costs = std::pow(AngleNormalize(theta), 2) + 0.1 *
         std::pow(angularVelocity, 2) + 0.001 * std::pow(torque, 2);

     // Calculate new state values and assign to the next state.
     double newAngularVelocity = angularVelocity + (-3.0 * gravity / (2 *
         length) * std::sin(theta + M_PI) + 3.0 / (mass * std::pow(length, 2)) *
         torque) * dt;
     nextState.Theta() = theta + newAngularVelocity * dt;
     nextState.AngularVelocity() = math::ClampRange(newAngularVelocity,
         -maxAngularVelocity, maxAngularVelocity);

     nextState.SetState();

     // Return the reward of taking the action in current state.
     // The reward is simply the negative of cost incurred for the action.
     return -costs;
   }

   double Sample(const State& state, const Action& action)
   {
     State nextState;
     return Sample(state, action, nextState);
   }

   State InitialSample()
   {
     State state;
     state.Theta() = math::Random(-M_PI, M_PI);
     state.AngularVelocity() = math::Random(-1.0, 1.0);
     stepsPerformed = 0;
     state.SetState();
     return state;
   }

   double AngleNormalize(double theta) const
   {
     // Scale angle within [-pi, pi).
     double x = fmod(theta + M_PI, 2 * M_PI);
     if (x < 0)
       x += 2 * M_PI;
     return x - M_PI;
   }

   bool IsTerminal(const State& /* state */) const
   {
     if (maxSteps != 0 && stepsPerformed >= maxSteps)
     {
       Log::Info << "Episode terminated due to the maximum number of steps"
           "being taken.";
       return true;
     }
     return false;
   }

   size_t StepsPerformed() const { return stepsPerformed; }

   size_t MaxSteps() const { return maxSteps; }
   size_t& MaxSteps() { return maxSteps; }

  private:
   size_t maxSteps;

   double maxAngularVelocity;

   double maxTorque;

   double dt;

   double doneReward;

   size_t stepsPerformed;
 };

 } // namespace rl
 } // namespace mlpack

 #endif
mlpack::rl::Pendulum::IsTerminal
bool IsTerminal(const State &) const
This function checks if the pendulum has reaches a terminal state.
Definition: pendulum.hpp:227

mlpack::rl::Pendulum::State::Theta
double Theta() const
Get the theta.
Definition: pendulum.hpp:60

mlpack::rl::Pendulum::State::Theta
double & Theta()
Modify the value of theta.
Definition: pendulum.hpp:62

mlpack::rl::Pendulum
Implementation of Pendulum task.
Definition: pendulum.hpp:31

mlpack
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: add_to_cli11.hpp:21

mlpack::rl::Pendulum::MaxSteps
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: pendulum.hpp:242

mlpack::rl::Pendulum::State::Encode
const arma::colvec & Encode()
Encode the state to a column vector.
Definition: pendulum.hpp:70

mlpack::rl::Pendulum::Sample
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Pendulum.
Definition: pendulum.hpp:140

prereqs.hpp
The core includes that mlpack expects; standard C++ includes and Armadillo.

mlpack::rl::Pendulum::State::Data
arma::colvec & Data()
Modify the internal representation of the state.
Definition: pendulum.hpp:57

mlpack::rl::Pendulum::State::State
State(const arma::colvec &data)
Construct a state based on the given data.
Definition: pendulum.hpp:53

mlpack::rl::Pendulum::AngleNormalize
double AngleNormalize(double theta) const
This function calculates the normalized angle for a particular theta.
Definition: pendulum.hpp:212

M_PI
#define M_PI
Definition: prereqs.hpp:39

mlpack::rl::Pendulum::State::AngularVelocity
double AngularVelocity() const
Get the angular velocity.
Definition: pendulum.hpp:65

clamp.hpp
Miscellaneous math clamping routines.

mlpack::rl::Pendulum::Action
Implementation of action of Pendulum.
Definition: pendulum.hpp:95

mlpack::rl::Pendulum::State::State
State()
Construct a state instance.
Definition: pendulum.hpp:44

mlpack::Log::Info
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84

mlpack::rl::Pendulum::StepsPerformed
size_t StepsPerformed() const
Get the number of steps performed.
Definition: pendulum.hpp:239

mlpack::rl::Pendulum::State::SetState
void SetState()
Updates the theta transformations in data.
Definition: pendulum.hpp:73

mlpack::rl::Pendulum::State::AngularVelocity
double & AngularVelocity()
Modify the value of angular velocity.
Definition: pendulum.hpp:67

mlpack::rl::Pendulum::State::dimension
static constexpr size_t dimension
Dimension of the encoded state.
Definition: pendulum.hpp:80

mlpack::rl::Pendulum::MaxSteps
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: pendulum.hpp:244

mlpack::rl::Pendulum::State
Implementation of state of Pendulum.
Definition: pendulum.hpp:38

mlpack::rl::Pendulum::Sample
double Sample(const State &state, const Action &action)
Dynamics of Pendulum.
Definition: pendulum.hpp:185

mlpack::math::Random
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83

mlpack::rl::Pendulum::InitialSample
State InitialSample()
Initial theta is randomly generated within [-pi, pi].
Definition: pendulum.hpp:197

mlpack::rl::Pendulum::Action::Action
Action()
Construct an action instance.
Definition: pendulum.hpp:101

mlpack::rl::Pendulum::Pendulum
Pendulum(const size_t maxSteps=200, const double maxAngularVelocity=8, const double maxTorque=2.0, const double dt=0.05, const double doneReward=0.0)
Construct a Pendulum instance using the given values.
Definition: pendulum.hpp:118

mlpack::rl::Pendulum::Action::action
std::vector< double > action
Definition: pendulum.hpp:103

mlpack::math::ClampRange
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53