acrobot.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP
14 #define MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP
15 
16 #include <mlpack/core.hpp>
17 
18 namespace mlpack{
19 namespace rl{
20 
28 class Acrobot
29 {
30  public:
31  /*
32  * Implementation of Acrobot State. Each State is a tuple vector
33  * (theta1, thetha2, angular velocity 1, angular velocity 2).
34  */
35  class State
36  {
37  public:
41  State(): data(dimension) { /* nothing to do here */ }
42 
48  State(const arma::colvec& data) : data(data)
49  { /* nothing to do here */ }
50 
52  arma::colvec& Data() { return data; }
53 
55  double Theta1() const { return data[0]; }
57  double& Theta1() { return data[0]; }
58 
60  double Theta2() const { return data[1]; }
62  double& Theta2() { return data[1]; }
63 
65  double AngularVelocity1() const { return data[2]; }
67  double& AngularVelocity1() { return data[2]; }
68 
70  double AngularVelocity2() const { return data[3]; }
72  double& AngularVelocity2() { return data[3]; }
73 
75  const arma::colvec& Encode() const { return data; }
76 
78  static constexpr size_t dimension = 4;
79 
80  private:
82  arma::colvec data;
83  };
84 
85  /*
86  * Implementation of action for Acrobot
87  */
88  class Action
89  {
90  public:
91  enum actions
92  {
96  };
97  // To store the action.
99 
100  // Track the size of the action space.
101  static const size_t size = 3;
102  };
103 
122  Acrobot(const size_t maxSteps = 500,
123  const double gravity = 9.81,
124  const double linkLength1 = 1.0,
125  const double linkLength2 = 1.0,
126  const double linkMass1 = 1.0,
127  const double linkMass2 = 1.0,
128  const double linkCom1 = 0.5,
129  const double linkCom2 = 0.5,
130  const double linkMoi = 1.0,
131  const double maxVel1 = 4 * M_PI,
132  const double maxVel2 = 9 * M_PI,
133  const double dt = 0.2,
134  const double doneReward = 0) :
135  maxSteps(maxSteps),
136  gravity(gravity),
137  linkLength1(linkLength1),
138  linkLength2(linkLength2),
139  linkMass1(linkMass1),
140  linkMass2(linkMass2),
141  linkCom1(linkCom1),
142  linkCom2(linkCom2),
143  linkMoi(linkMoi),
144  maxVel1(maxVel1),
145  maxVel2(maxVel2),
146  dt(dt),
147  doneReward(doneReward),
148  stepsPerformed(0)
149  { /* Nothing to do here */ }
150 
160  double Sample(const State& state,
161  const Action& action,
162  State& nextState)
163  {
164  // Update the number of steps performed.
165  stepsPerformed++;
166 
167  // Make a vector to estimate nextstate.
168  arma::colvec currentState = {state.Theta1(), state.Theta2(),
169  state.AngularVelocity1(), state.AngularVelocity2()};
170 
171  arma::colvec currentNextState = Rk4(currentState, Torque(action));
172 
173  nextState.Theta1() = Wrap(currentNextState[0], -M_PI, M_PI);
174 
175  nextState.Theta2() = Wrap(currentNextState[1], -M_PI, M_PI);
176 
178  nextState.AngularVelocity1() = math::ClampRange(currentNextState[2],
179  -maxVel1, maxVel1);
180  nextState.AngularVelocity2() = math::ClampRange(currentNextState[3],
181  -maxVel2, maxVel2);
182 
183  // Check if the episode has terminated.
184  bool done = IsTerminal(nextState);
185 
186  // Do not reward the agent if time ran out.
187  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
188  return 0;
189  else if (done)
190  return doneReward;
191 
192  return -1;
193  };
194 
204  double Sample(const State& state, const Action& action)
205  {
206  State nextState;
207  return Sample(state, action, nextState);
208  }
209 
214  {
215  stepsPerformed = 0;
216  return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
217  }
218 
225  bool IsTerminal(const State& state) const
226  {
227  if (maxSteps != 0 && stepsPerformed >= maxSteps)
228  {
229  Log::Info << "Episode terminated due to the maximum number of steps"
230  "being taken.";
231  return true;
232  }
233  else if (-std::cos(state.Theta1()) - std::cos(state.Theta1() +
234  state.Theta2()) > 1.0)
235  {
236  Log::Info << "Episode terminated due to agent succeeding.";
237  return true;
238  }
239  return false;
240  }
241 
249  arma::colvec Dsdt(arma::colvec state, const double torque) const
250  {
251  const double m1 = linkMass1;
252  const double m2 = linkMass2;
253  const double l1 = linkLength1;
254  const double lc1 = linkCom1;
255  const double lc2 = linkCom2;
256  const double I1 = linkMoi;
257  const double I2 = linkMoi;
258  const double g = gravity;
259  const double a = torque;
260  const double theta1 = state[0];
261  const double theta2 = state[1];
262 
263  arma::colvec values(4);
264  values[0] = state[2];
265  values[1] = state[3];
266 
267  const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
268  std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
269 
270  const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
271  I2;
272 
273  const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 - M_PI / 2.);
274 
275  const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
276  std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
277  std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
278  std::cos(theta1 - M_PI / 2) + phi2;
279 
280  values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
281  std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
282  std::pow(d2, 2) / d1);
283 
284  values[2] = -(d2 * values[3] + phi1) / d1;
285 
286  return values;
287  };
288 
298  double Wrap(double value,
299  const double minimum,
300  const double maximum) const
301  {
302  const double diff = maximum - minimum;
303 
304  if (value > maximum)
305  {
306  value = value - diff;
307  }
308  else if (value < minimum)
309  {
310  value = value + diff;
311  }
312 
313  return value;
314  };
315 
322  double Torque(const Action& action) const
323  {
324  // Add noise to the Torque Torque is action number - 1. {0,1,2} -> {-1,0,1}.
325  return double(action.action - 1) + mlpack::math::Random(-0.1, 0.1);
326  }
327 
335  arma::colvec Rk4(const arma::colvec state, const double torque) const
336  {
337  arma::colvec k1 = Dsdt(state, torque);
338  arma::colvec k2 = Dsdt(state + dt * k1 / 2, torque);
339  arma::colvec k3 = Dsdt(state + dt * k2 / 2, torque);
340  arma::colvec k4 = Dsdt(state + dt * k3, torque);
341  arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
342 
343  return nextState;
344  };
345 
347  size_t StepsPerformed() const { return stepsPerformed; }
348 
350  size_t MaxSteps() const { return maxSteps; }
352  size_t& MaxSteps() { return maxSteps; }
353 
354  private:
356  size_t maxSteps;
357 
359  double gravity;
360 
362  double linkLength1;
363 
365  double linkLength2;
366 
368  double linkMass1;
369 
371  double linkMass2;
372 
374  double linkCom1;
375 
377  double linkCom2;
378 
380  double linkMoi;
381 
383  double maxVel1;
384 
386  double maxVel2;
387 
389  double dt;
390 
392  double doneReward;
393 
395  size_t stepsPerformed;
396 };
397 
398 } // namespace rl
399 } // namespace mlpack
400 
401 #endif
arma::colvec & Data()
Modify the state representation.
Definition: acrobot.hpp:52
Acrobot(const size_t maxSteps=500, const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0)
Construct a Acrobot instance using the given constants.
Definition: acrobot.hpp:122
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of the Acrobot System.
Definition: acrobot.hpp:160
bool IsTerminal(const State &state) const
This function checks if the acrobot has reached the terminal state.
Definition: acrobot.hpp:225
Linear algebra utility functions, generally performed on matrices or vectors.
double & AngularVelocity1()
Modify the angular velocity (one).
Definition: acrobot.hpp:67
double AngularVelocity1() const
Get value of Angular velocity (one).
Definition: acrobot.hpp:65
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: acrobot.hpp:350
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: acrobot.hpp:75
#define M_PI
Definition: prereqs.hpp:39
size_t StepsPerformed() const
Get the number of steps performed.
Definition: acrobot.hpp:347
double & Theta1()
Modify value of theta (one).
Definition: acrobot.hpp:57
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
double Torque(const Action &action) const
This function calculates the torque for a particular action.
Definition: acrobot.hpp:322
double & Theta2()
Modify value of theta (two).
Definition: acrobot.hpp:62
double & AngularVelocity2()
Modify the angular velocity (two).
Definition: acrobot.hpp:72
Action::actions action
Definition: acrobot.hpp:98
static constexpr size_t dimension
Dimension of the encoded state.
Definition: acrobot.hpp:78
Implementation of Acrobot game.
Definition: acrobot.hpp:28
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: acrobot.hpp:352
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method...
Definition: acrobot.hpp:249
double AngularVelocity2() const
Get value of Angular velocity (two).
Definition: acrobot.hpp:70
double Sample(const State &state, const Action &action)
Dynamics of the Acrobot System.
Definition: acrobot.hpp:204
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double Theta2() const
Get value of theta (two).
Definition: acrobot.hpp:60
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83
double Theta1() const
Get value of theta (one).
Definition: acrobot.hpp:55
State()
Construct a state instance.
Definition: acrobot.hpp:41
State(const arma::colvec &data)
Construct a state instance from given data.
Definition: acrobot.hpp:48
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
Definition: acrobot.hpp:298
State InitialSample()
This function does random initialization of state space.
Definition: acrobot.hpp:213
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
Definition: acrobot.hpp:335
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53