rli.cc

/***
*
* rli.cc
*
* $Revision: 1.3 $
*
* Description:
*   Implementation for the Simulation, Agent, and Environment abstract
*   classes. These are abstract classes that define the three main objects
*   to perform reinforcement learning simulation. Refer to the
*   documentation in URL:
*   http://www-anw.cs.umass.edu/People/sutton/RLinterface/RLinterface.html
*   for further details.
*
* Author:
*   Juan Carlos Santamaria
*     E-mail: carlos@cc.gatech.edu
*     URL:    http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
*   $Id: rli.cc,v 1.3 1996/09/19 21:58:01 carlos Exp $
*
* Revision History:
*   $Log: rli.cc,v $
*   Revision 1.3  1996/09/19  21:58:01  carlos
*   - Eliminate the arguments ps and pa from Agent::step.
*   - Change the order of arguments in Enviornment::step:
*       old: A,r,S'      new: A,S',r
*   - Change the order of arguments in Simulation::collect_data:
*       old: S,A,r,S'    new: S,A,S',r
*     Now all the order of arguments in all functions are consistent.
*
*   Revision 1.2  1996/08/29  15:10:55  carlos
*   - Definition of constant TERMINAL_STATE.
*   - Change in the order of arguments in Agent::step:
*       old: S,A,r,S'    new: S,A,S',r
*   - Environment::transition is now Environment::step.
*   - Deletion of current sensation and action in Simulation::start_trial was
*     added.
*   - Simulation::run_steps and Simulation::run_trials are now
*     Simulation::steps and Simulation::trials.
*
*   Revision 1.1  1996/08/14  20:57:58  carlos
*   Initial revision
*
****/

#pragma  implementation


// -- Include files

#include "rli.h"


// -- Member function definitions


//============================================================================
// Agent::init()

void Agent::init( int argc, char *argv[] )
{
}



//============================================================================
// Environment::init()

void Environment::init( int argc, char *argv[] )
{
}



//============================================================================
// Simulation::init()

void Simulation::init( int argc, char *argv[] )
{
    penv->init(argc,argv);
    pagt->init(argc,argv);
}



//============================================================================
// Simulation::steps()

void Simulation::steps( long num_steps )
{
    Sensation *pnext_s;
    Action    *pnext_a;
    double     reward;

    if ( pcurr_s == TERMINAL_STATE )
        start_trial();
    
    for( long step = 1; step <= num_steps ; step++ ) {

        penv->step( pcurr_a, pnext_s, reward );
        
        collect_data( pcurr_s, pcurr_a, pnext_s, reward );
        
        pnext_a = pagt->step( pnext_s, reward );
        
        delete pcurr_s;
        delete pcurr_a;
        
        if ( pnext_s != TERMINAL_STATE ) {
            pcurr_s = pnext_s;
            pcurr_a = pnext_a;
        }
        else
            start_trial();
    }
}



//============================================================================
// Simulation::trials()

void Simulation::trials( long num_trials, long max_steps_per_trial )
{
    Sensation *pnext_s;
    Action    *pnext_a;
    double     reward;
    long       trial, step;

    if ( pcurr_s == TERMINAL_STATE )
        start_trial();
    
    for( trial=1 ; trial <= num_trials ; trial++ ) {
        for( step=1 ; (pcurr_s!=0) && (step <= max_steps_per_trial) ; step++) {
            
            penv->step( pcurr_a, pnext_s, reward );
        
            collect_data( pcurr_s, pcurr_a, pnext_s, reward );

            pnext_a = pagt->step( pnext_s, reward );

            delete pcurr_s;
            delete pcurr_a;
            
            pcurr_s = pnext_s;
            pcurr_a = pnext_a;
        }

        delete pcurr_s;
        delete pcurr_a;

        start_trial();
    }
}



//============================================================================
// Simulation::start_trial()

void Simulation::start_trial( void )
{
    delete pcurr_s;
    delete pcurr_a;
    
    pcurr_s = penv->start_trial();
    pcurr_a = pagt->start_trial(pcurr_s);
}



//============================================================================
// Simulation::collect_data()

void Simulation::collect_data( const Sensation *ps,
                               const Action    *pa,
                               const Sensation *pnext_s,
                               double           reward )
{
}


/****************************** end of file *********************************/