/***
*
* environment-dbi.h
* 
* $Revision: 1.3 $
* 
* Description:
*    Interface for a double integrator object. The state consists of two
*    continuous variables: position (p) and velocity (v). The actions
*    consists of one continuous variable: accelearion (a).  The dynamics
*    equation is a double integration (linear):
*      dp/dt = v
*      dv/dt = a
*    The starting position is given by (POS_0,VEL_0) and the reward function
*    is quadratic of the form  x'Qx + u'Ru. That is, the goal is to reach the
*    origin of the state space and remain there. The use of energy (a^2) is
*    penalized too. Additionally, if the state falls out of bounds the
*    simulation finishes and the reward is MIN_REWARD.
*
* Author:
*   Juan Carlos Santamaria
*     E-mail: carlos@cc.gatech.edu
*     URL:    http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
*   $Id: environment-dbi.h,v 1.3 1996/09/19 22:19:54 carlos Exp $
*
* Revision History:
*   $Log: environment-dbi.h,v $
*   Revision 1.3  1996/09/19  22:19:54  carlos
*   - Change the order of arguments in Enviornment::step:
*       old: A,r,S'      new: A,S',r
*   - Add the constant GAMMA to the class. The value is initialize at
*     creation time using a constructor parameter.
*
*   Revision 1.2  1996/08/28  20:19:16  carlos
*   Environment::transition is now Environment::step.
*
*   Revision 1.1  1996/08/14  20:53:13  carlos
*   Initial revision
*
*
****/

#pragma interface

#ifndef _INCL_ENVIRONMENT_DBI
#define _INCL_ENVIRONMENT_DBI


// -- Include files

#ifndef _INCL_RLI
#include "rli.h"
#endif


// -- Constants

// common stuff 

const double  DELTA_T          = 0.05;
const int     NUM_ITS_PER_STEP = 4;


// state indexes 

const int     POS              = 0;
const int     VEL              = 1;
const int     FOR              = 2;


// bounds 

const double POS_MAX           =  1.0;
const double POS_MIN           = -1.0;
const double VEL_MAX           =  1.0;
const double VEL_MIN           = -1.0;
const double FOR_MAX           =  1.0;
const double FOR_MIN           = -1.0;


// initial conditions 

const double POS_0             = 1.0;
const double VEL_0             = 0.0;


// goal state

const double POS_F             = 0.0;
const double VEL_F             = 0.0;


// ohter constants 

const double Q_POS             = 1.0;
const double Q_VEL             = 0.0;
const double R                 = 1.0;

const double MIN_REWARD        = -50.0;


// -- Class and type declarations


class State : public Sensation {
public:
    
    double pos, vel;

    State( double p=0.0, double v=0.0 ) : pos(p), vel(v) {}
    State( const State& s ) { pos=s.pos; vel=s.vel; }

    State& operator=( const State& s ) { pos=s.pos; vel=s.vel; return *this; }
};


class Force : public Action {
public:
    
    double acc;
    
    Force( double a=0.0 ) : acc(a) {}
    Force( const Force& f ) { acc=f.acc; }

    Force& operator=( const Force& f ) { acc=f.acc; return *this; }
};


class E_DBI : public Environment {
public:
    enum Init { START, GOAL, RANDOM };

    const  double GAMMA;
    
    Init   mode;   
    double time;
    State  state;

    E_DBI( double gamma, Init m = START ) : Environment(),
                                            GAMMA(gamma),
                                            mode(m),
                                            time(0.0) {}
    ~E_DBI( void ) {}

    void       init( int argc, char *argv[] );
    
    Sensation *start_trial( void );

    void       step( const Action *pa,
                     Sensation    *&pnext_s,
                     double        &reward );
};


#endif

/****************************** end of file *********************************/