/*** * * rli.cc * * $Revision: 1.3 $ * * Description: * Implementation for the Simulation, Agent, and Environment abstract * classes. These are abstract classes that define the three main objects * to perform reinforcement learning simulation. Refer to the * documentation in URL: * http://www-anw.cs.umass.edu/People/sutton/RLinterface/RLinterface.html * for further details. * * Author: * Juan Carlos Santamaria * E-mail: carlos@cc.gatech.edu * URL: http://www.cc.gatech.edu/ai/students/jcs * * File name: * $Id: rli.cc,v 1.3 1996/09/19 21:58:01 carlos Exp $ * * Revision History: * $Log: rli.cc,v $ * Revision 1.3 1996/09/19 21:58:01 carlos * - Eliminate the arguments ps and pa from Agent::step. * - Change the order of arguments in Enviornment::step: * old: A,r,S' new: A,S',r * - Change the order of arguments in Simulation::collect_data: * old: S,A,r,S' new: S,A,S',r * Now all the order of arguments in all functions are consistent. * * Revision 1.2 1996/08/29 15:10:55 carlos * - Definition of constant TERMINAL_STATE. * - Change in the order of arguments in Agent::step: * old: S,A,r,S' new: S,A,S',r * - Environment::transition is now Environment::step. * - Deletion of current sensation and action in Simulation::start_trial was * added. * - Simulation::run_steps and Simulation::run_trials are now * Simulation::steps and Simulation::trials. * * Revision 1.1 1996/08/14 20:57:58 carlos * Initial revision * ****/ #pragma implementation // -- Include files #include "rli.h" // -- Member function definitions //============================================================================ // Agent::init() void Agent::init( int argc, char *argv[] ) { } //============================================================================ // Environment::init() void Environment::init( int argc, char *argv[] ) { } //============================================================================ // Simulation::init() void Simulation::init( int argc, char *argv[] ) { penv->init(argc,argv); pagt->init(argc,argv); } //============================================================================ // Simulation::steps() void Simulation::steps( long num_steps ) { Sensation *pnext_s; Action *pnext_a; double reward; if ( pcurr_s == TERMINAL_STATE ) start_trial(); for( long step = 1; step <= num_steps ; step++ ) { penv->step( pcurr_a, pnext_s, reward ); collect_data( pcurr_s, pcurr_a, pnext_s, reward ); pnext_a = pagt->step( pnext_s, reward ); delete pcurr_s; delete pcurr_a; if ( pnext_s != TERMINAL_STATE ) { pcurr_s = pnext_s; pcurr_a = pnext_a; } else start_trial(); } } //============================================================================ // Simulation::trials() void Simulation::trials( long num_trials, long max_steps_per_trial ) { Sensation *pnext_s; Action *pnext_a; double reward; long trial, step; if ( pcurr_s == TERMINAL_STATE ) start_trial(); for( trial=1 ; trial <= num_trials ; trial++ ) { for( step=1 ; (pcurr_s!=0) && (step <= max_steps_per_trial) ; step++) { penv->step( pcurr_a, pnext_s, reward ); collect_data( pcurr_s, pcurr_a, pnext_s, reward ); pnext_a = pagt->step( pnext_s, reward ); delete pcurr_s; delete pcurr_a; pcurr_s = pnext_s; pcurr_a = pnext_a; } delete pcurr_s; delete pcurr_a; start_trial(); } } //============================================================================ // Simulation::start_trial() void Simulation::start_trial( void ) { delete pcurr_s; delete pcurr_a; pcurr_s = penv->start_trial(); pcurr_a = pagt->start_trial(pcurr_s); } //============================================================================ // Simulation::collect_data() void Simulation::collect_data( const Sensation *ps, const Action *pa, const Sensation *pnext_s, double reward ) { } /****************************** end of file *********************************/