/***
*
* rli.cc
*
* $Revision: 1.3 $
*
* Description:
* Implementation for the Simulation, Agent, and Environment abstract
* classes. These are abstract classes that define the three main objects
* to perform reinforcement learning simulation. Refer to the
* documentation in URL:
* http://www-anw.cs.umass.edu/People/sutton/RLinterface/RLinterface.html
* for further details.
*
* Author:
* Juan Carlos Santamaria
* E-mail: carlos@cc.gatech.edu
* URL: http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
* $Id: rli.cc,v 1.3 1996/09/19 21:58:01 carlos Exp $
*
* Revision History:
* $Log: rli.cc,v $
* Revision 1.3 1996/09/19 21:58:01 carlos
* - Eliminate the arguments ps and pa from Agent::step.
* - Change the order of arguments in Enviornment::step:
* old: A,r,S' new: A,S',r
* - Change the order of arguments in Simulation::collect_data:
* old: S,A,r,S' new: S,A,S',r
* Now all the order of arguments in all functions are consistent.
*
* Revision 1.2 1996/08/29 15:10:55 carlos
* - Definition of constant TERMINAL_STATE.
* - Change in the order of arguments in Agent::step:
* old: S,A,r,S' new: S,A,S',r
* - Environment::transition is now Environment::step.
* - Deletion of current sensation and action in Simulation::start_trial was
* added.
* - Simulation::run_steps and Simulation::run_trials are now
* Simulation::steps and Simulation::trials.
*
* Revision 1.1 1996/08/14 20:57:58 carlos
* Initial revision
*
****/
#pragma implementation
// -- Include files
#include "rli.h"
// -- Member function definitions
//============================================================================
// Agent::init()
void Agent::init( int argc, char *argv[] )
{
}
//============================================================================
// Environment::init()
void Environment::init( int argc, char *argv[] )
{
}
//============================================================================
// Simulation::init()
void Simulation::init( int argc, char *argv[] )
{
penv->init(argc,argv);
pagt->init(argc,argv);
}
//============================================================================
// Simulation::steps()
void Simulation::steps( long num_steps )
{
Sensation *pnext_s;
Action *pnext_a;
double reward;
if ( pcurr_s == TERMINAL_STATE )
start_trial();
for( long step = 1; step <= num_steps ; step++ ) {
penv->step( pcurr_a, pnext_s, reward );
collect_data( pcurr_s, pcurr_a, pnext_s, reward );
pnext_a = pagt->step( pnext_s, reward );
delete pcurr_s;
delete pcurr_a;
if ( pnext_s != TERMINAL_STATE ) {
pcurr_s = pnext_s;
pcurr_a = pnext_a;
}
else
start_trial();
}
}
//============================================================================
// Simulation::trials()
void Simulation::trials( long num_trials, long max_steps_per_trial )
{
Sensation *pnext_s;
Action *pnext_a;
double reward;
long trial, step;
if ( pcurr_s == TERMINAL_STATE )
start_trial();
for( trial=1 ; trial <= num_trials ; trial++ ) {
for( step=1 ; (pcurr_s!=0) && (step <= max_steps_per_trial) ; step++) {
penv->step( pcurr_a, pnext_s, reward );
collect_data( pcurr_s, pcurr_a, pnext_s, reward );
pnext_a = pagt->step( pnext_s, reward );
delete pcurr_s;
delete pcurr_a;
pcurr_s = pnext_s;
pcurr_a = pnext_a;
}
delete pcurr_s;
delete pcurr_a;
start_trial();
}
}
//============================================================================
// Simulation::start_trial()
void Simulation::start_trial( void )
{
delete pcurr_s;
delete pcurr_a;
pcurr_s = penv->start_trial();
pcurr_a = pagt->start_trial(pcurr_s);
}
//============================================================================
// Simulation::collect_data()
void Simulation::collect_data( const Sensation *ps,
const Action *pa,
const Sensation *pnext_s,
double reward )
{
}
/****************************** end of file *********************************/