main-dbi.cc (20-Sep-1996)

/***
*
* main-dbi.cc
*
* $Revision: 1.3 $
*
* Description:
*   This module implements two types of simulation for the double
*   integrator: detailed and stat. The detailed simulation reports
*   the state of the double-integrator at every state. The stat
*   simulator reports the cumulative reward and number of steps for
*   every trial.
*
* Author:
*   Juan Carlos Santamaria
*     E-mail: carlos@cc.gatech.edu
*     URL:    http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
*   $Id: main-dbi.cc,v 1.3 1996/09/20 13:30:31 carlos Exp $
*
* Revision History:
*   $Log: main-dbi.cc,v $
*   Revision 1.3  1996/09/20  13:30:31  carlos
*   - Change the order of arguments in Simulation::collect_data:
*       old: S,A,r,S'    new: S,A,S',r
*     Now all the order of arguments in all functions are consistent.
*   - Add functionality to S_Detailed_DBI to keep track and report
*     the number of steps and accumulated reward. Function: output_summary.
*   - S_Detailed_DBI and S_Report_DBI now use the discount factor, GAMMA,
*     directly from the environment.
*   - Creation of E_DBI instance now requires GAMMA as an argument.
*
*   Revision 1.2  1996/08/28  20:27:49  carlos
*   - Rename Detailed_DBI and Stat_DBI to S_Detailed_DBI and S_Report_DBI.
*   - Environment::transition is now Environment::step.
*
*   Revision 1.1  1996/08/14  22:16:56  carlos
*   Initial revision
*
****/

#pragma  implementation


// -- Include files

#include 
#include 
#include 

#include "rli.h"
#include "environment-dbi.h"

#ifdef _OPTIMAL
#include "agent-optimal-dbi.h"
#endif

#ifdef _CMAC
#include "agent-cmac-dbi.h"
#endif


// -- Local types and classes


class S_Detailed_DBI : public Simulation {
public:
    double acc_reward;
    int    acc_steps;
    int    trial;
    
    S_Detailed_DBI( Agent *pa, E_DBI *pe ) : Simulation(pa,pe)
        { acc_reward=0.0; acc_steps=0; trial=0; }

    void init( int argc, char *argv[] );

    void start_trial( void );
    
    void collect_data( const Sensation *ps,
                       const Action    *pa,
                       const Sensation *pnext_s,
                       double           reward );

    void output_summary( void );
};


class S_Report_DBI : public Simulation {
public:
    double acc_reward;
    int    acc_steps;
    int    trial;
    
    S_Report_DBI( Agent *pa, E_DBI *pe ) : Simulation(pa,pe)
        { acc_reward=0.0; acc_steps=0; trial=0; }

    void init( int argc, char *argv[] );

    void start_trial( void );
    
    void collect_data( const Sensation *ps,
                       const Action    *pa,
                       const Sensation *pnext_s,
                       double           reward );
};



// -- Member function definitions

//============================================================================
// S_Detailed_DBI::init()

void S_Detailed_DBI::init( int argc, char *argv[] )
{
    Simulation::init(argc,argv);

    acc_reward = 0.0;
    acc_steps  = 0;
    trial      = 0;
}


//============================================================================
// S_Detailed_DBI::start_trial()

void S_Detailed_DBI::start_trial( void )
{
    Simulation::start_trial();

    if ( trial > 0 ) {
        output_summary();
        
        acc_reward = 0.0;
        acc_steps  = 0;
    }
    
    trial++;
}


//============================================================================
// S_Detailed_DBI::collect_data()

void S_Detailed_DBI::collect_data( const Sensation *ps,
                                   const Action    *pa,
                                   const Sensation *pnext_s,
                                   double           reward )
{
    State *pstate = (State *)ps;
    cout<< pstate->pos<< "   "<< pstate->vel<< endl;

    acc_reward = ((E_DBI *)penv)->GAMMA*acc_reward + reward;
    acc_steps  += 1;
}


//============================================================================
// S_Detailed_DBI::output_summary()

void S_Detailed_DBI::output_summary( void )
{
    cout<< "# trial: ";
    cout.width(3);
    cout<< trial<< "  steps: ";
    cout.width(5);
    cout<< acc_steps<< "  acc. reward: ";
    cout.precision(10);
    cout<< acc_reward<< endl;
}

        
//============================================================================
// S_Report_DBI::init()

void S_Report_DBI::init( int argc, char *argv[] )
{
    Simulation::init(argc,argv);

    acc_reward = 0.0;
    acc_steps  = 0;
    trial      = 0;

    cout<< "trials    steps    Acc. reward"<< endl;
}

//============================================================================
// S_Report_DBI::start_trial()

void S_Report_DBI::start_trial( void )
{
    Simulation::start_trial();

    if ( trial > 0 ) {
        cout.width(4);
        cout<< trial;
        cout.width(10);
        cout<< acc_steps;
        
        printf("%15.4f\n",acc_reward);

        acc_reward = 0.0;
        acc_steps  = 0;
    }
    
    trial++;
}


//============================================================================
// S_Report_DBI::collect_data()

void S_Report_DBI::collect_data( const Sensation *ps,
                                 const Action    *pa,
                                 const Sensation *pnext_s,
                                 double           reward )
{
    acc_reward = ((E_DBI *)penv)->GAMMA*acc_reward + reward;
    acc_steps  += 1;
}


// -- Global function definitions

int main( int argc, char *argv[] )
{
    E_DBI       *pe;
    
    long         num_trials = 50;
    long         max_steps_per_trial = 200;
    
    // create agent according to switch

#ifdef _OPTIMAL
    A_Optimal_DBI *pa = new A_Optimal_DBI;
#endif
    
#ifdef _CMAC
    A_CMAC_DBI *pa = new A_CMAC_DBI;
#endif
    
    // create DBI environment

    pe = new E_DBI(1.0);


    // create simulation according to switch
    
#ifdef _OPTIMAL    
    psim = new S_Detailed_DBI(pa,pe);
    #endif
    
#ifdef _CMAC
    psim = new S_Report_DBI(pa,pe);
#endif

    // init with random seed: 1

    char *args[1];  args[0] = "1";
    
    psim->init( 1, args );

    
    // run trials

#ifdef _OPTIMAL    
    psim->steps( max_steps_per_trial );
    psim->output_summary();
#endif
    
#ifdef _CMAC    
    psim->trials( num_trials, max_steps_per_trial );
#endif
}



/****************************** end of file *********************************/