13 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP 14 #define MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP 48 State(
const arma::colvec& data) : data(data)
52 arma::colvec&
Data() {
return data; }
55 double Theta1()
const {
return data[0]; }
60 double Theta2()
const {
return data[1]; }
75 const arma::colvec&
Encode()
const {
return data; }
101 static const size_t size = 3;
123 const double gravity = 9.81,
124 const double linkLength1 = 1.0,
125 const double linkLength2 = 1.0,
126 const double linkMass1 = 1.0,
127 const double linkMass2 = 1.0,
128 const double linkCom1 = 0.5,
129 const double linkCom2 = 0.5,
130 const double linkMoi = 1.0,
131 const double maxVel1 = 4 *
M_PI,
132 const double maxVel2 = 9 *
M_PI,
133 const double dt = 0.2,
134 const double doneReward = 0) :
137 linkLength1(linkLength1),
138 linkLength2(linkLength2),
139 linkMass1(linkMass1),
140 linkMass2(linkMass2),
147 doneReward(doneReward),
168 arma::colvec currentState = {state.
Theta1(), state.
Theta2(),
171 arma::colvec currentNextState =
Rk4(currentState,
Torque(action));
187 if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
207 return Sample(state, action, nextState);
216 return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
227 if (maxSteps != 0 && stepsPerformed >= maxSteps)
229 Log::Info <<
"Episode terminated due to the maximum number of steps" 233 else if (-std::cos(state.
Theta1()) - std::cos(state.
Theta1() +
236 Log::Info <<
"Episode terminated due to agent succeeding.";
249 arma::colvec
Dsdt(arma::colvec state,
const double torque)
const 251 const double m1 = linkMass1;
252 const double m2 = linkMass2;
253 const double l1 = linkLength1;
254 const double lc1 = linkCom1;
255 const double lc2 = linkCom2;
256 const double I1 = linkMoi;
257 const double I2 = linkMoi;
258 const double g = gravity;
259 const double a = torque;
260 const double theta1 = state[0];
261 const double theta2 = state[1];
263 arma::colvec values(4);
264 values[0] = state[2];
265 values[1] = state[3];
267 const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
268 std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
270 const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
273 const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 -
M_PI / 2.);
275 const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
276 std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
277 std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
278 std::cos(theta1 -
M_PI / 2) + phi2;
280 values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
281 std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
282 std::pow(d2, 2) / d1);
284 values[2] = -(d2 * values[3] + phi1) / d1;
299 const double minimum,
300 const double maximum)
const 302 const double diff = maximum - minimum;
306 value = value - diff;
308 else if (value < minimum)
310 value = value + diff;
335 arma::colvec
Rk4(
const arma::colvec state,
const double torque)
const 337 arma::colvec k1 =
Dsdt(state, torque);
338 arma::colvec k2 =
Dsdt(state + dt * k1 / 2, torque);
339 arma::colvec k3 =
Dsdt(state + dt * k2 / 2, torque);
340 arma::colvec k4 =
Dsdt(state + dt * k3, torque);
341 arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
395 size_t stepsPerformed;
arma::colvec & Data()
Modify the state representation.
Acrobot(const size_t maxSteps=500, const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0)
Construct a Acrobot instance using the given constants.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of the Acrobot System.
bool IsTerminal(const State &state) const
This function checks if the acrobot has reached the terminal state.
Linear algebra utility functions, generally performed on matrices or vectors.
double & AngularVelocity1()
Modify the angular velocity (one).
double AngularVelocity1() const
Get value of Angular velocity (one).
size_t MaxSteps() const
Get the maximum number of steps allowed.
const arma::colvec & Encode() const
Encode the state to a column vector.
size_t StepsPerformed() const
Get the number of steps performed.
double & Theta1()
Modify value of theta (one).
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
double Torque(const Action &action) const
This function calculates the torque for a particular action.
double & Theta2()
Modify value of theta (two).
double & AngularVelocity2()
Modify the angular velocity (two).
static constexpr size_t dimension
Dimension of the encoded state.
Implementation of Acrobot game.
size_t & MaxSteps()
Set the maximum number of steps allowed.
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method...
double AngularVelocity2() const
Get value of Angular velocity (two).
double Sample(const State &state, const Action &action)
Dynamics of the Acrobot System.
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double Theta2() const
Get value of theta (two).
double Random()
Generates a uniform random number between 0 and 1.
double Theta1() const
Get value of theta (one).
State()
Construct a state instance.
State(const arma::colvec &data)
Construct a state instance from given data.
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
State InitialSample()
This function does random initialization of state space.
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.