/*** * * environment-dbi.h * * $Revision: 1.3 $ * * Description: * Interface for a double integrator object. The state consists of two * continuous variables: position (p) and velocity (v). The actions * consists of one continuous variable: accelearion (a). The dynamics * equation is a double integration (linear): * dp/dt = v * dv/dt = a * The starting position is given by (POS_0,VEL_0) and the reward function * is quadratic of the form x'Qx + u'Ru. That is, the goal is to reach the * origin of the state space and remain there. The use of energy (a^2) is * penalized too. Additionally, if the state falls out of bounds the * simulation finishes and the reward is MIN_REWARD. * * Author: * Juan Carlos Santamaria * E-mail: carlos@cc.gatech.edu * URL: http://www.cc.gatech.edu/ai/students/jcs * * File name: * $Id: environment-dbi.h,v 1.3 1996/09/19 22:19:54 carlos Exp $ * * Revision History: * $Log: environment-dbi.h,v $ * Revision 1.3 1996/09/19 22:19:54 carlos * - Change the order of arguments in Enviornment::step: * old: A,r,S' new: A,S',r * - Add the constant GAMMA to the class. The value is initialize at * creation time using a constructor parameter. * * Revision 1.2 1996/08/28 20:19:16 carlos * Environment::transition is now Environment::step. * * Revision 1.1 1996/08/14 20:53:13 carlos * Initial revision * * ****/ #pragma interface #ifndef _INCL_ENVIRONMENT_DBI #define _INCL_ENVIRONMENT_DBI // -- Include files #ifndef _INCL_RLI #include "rli.h" #endif // -- Constants // common stuff const double DELTA_T = 0.05; const int NUM_ITS_PER_STEP = 4; // state indexes const int POS = 0; const int VEL = 1; const int FOR = 2; // bounds const double POS_MAX = 1.0; const double POS_MIN = -1.0; const double VEL_MAX = 1.0; const double VEL_MIN = -1.0; const double FOR_MAX = 1.0; const double FOR_MIN = -1.0; // initial conditions const double POS_0 = 1.0; const double VEL_0 = 0.0; // goal state const double POS_F = 0.0; const double VEL_F = 0.0; // ohter constants const double Q_POS = 1.0; const double Q_VEL = 0.0; const double R = 1.0; const double MIN_REWARD = -50.0; // -- Class and type declarations class State : public Sensation { public: double pos, vel; State( double p=0.0, double v=0.0 ) : pos(p), vel(v) {} State( const State& s ) { pos=s.pos; vel=s.vel; } State& operator=( const State& s ) { pos=s.pos; vel=s.vel; return *this; } }; class Force : public Action { public: double acc; Force( double a=0.0 ) : acc(a) {} Force( const Force& f ) { acc=f.acc; } Force& operator=( const Force& f ) { acc=f.acc; return *this; } }; class E_DBI : public Environment { public: enum Init { START, GOAL, RANDOM }; const double GAMMA; Init mode; double time; State state; E_DBI( double gamma, Init m = START ) : Environment(), GAMMA(gamma), mode(m), time(0.0) {} ~E_DBI( void ) {} void init( int argc, char *argv[] ); Sensation *start_trial( void ); void step( const Action *pa, Sensation *&pnext_s, double &reward ); }; #endif /****************************** end of file *********************************/