/*** * * main-dbi.cc * * $Revision: 1.3 $ * * Description: * This module implements two types of simulation for the double * integrator: detailed and stat. The detailed simulation reports * the state of the double-integrator at every state. The stat * simulator reports the cumulative reward and number of steps for * every trial. * * Author: * Juan Carlos Santamaria * E-mail: carlos@cc.gatech.edu * URL: http://www.cc.gatech.edu/ai/students/jcs * * File name: * $Id: main-dbi.cc,v 1.3 1996/09/20 13:30:31 carlos Exp $ * * Revision History: * $Log: main-dbi.cc,v $ * Revision 1.3 1996/09/20 13:30:31 carlos * - Change the order of arguments in Simulation::collect_data: * old: S,A,r,S' new: S,A,S',r * Now all the order of arguments in all functions are consistent. * - Add functionality to S_Detailed_DBI to keep track and report * the number of steps and accumulated reward. Function: output_summary. * - S_Detailed_DBI and S_Report_DBI now use the discount factor, GAMMA, * directly from the environment. * - Creation of E_DBI instance now requires GAMMA as an argument. * * Revision 1.2 1996/08/28 20:27:49 carlos * - Rename Detailed_DBI and Stat_DBI to S_Detailed_DBI and S_Report_DBI. * - Environment::transition is now Environment::step. * * Revision 1.1 1996/08/14 22:16:56 carlos * Initial revision * ****/ #pragma implementation // -- Include files #include#include #include #include "rli.h" #include "environment-dbi.h" #ifdef _OPTIMAL #include "agent-optimal-dbi.h" #endif #ifdef _CMAC #include "agent-cmac-dbi.h" #endif // -- Local types and classes class S_Detailed_DBI : public Simulation { public: double acc_reward; int acc_steps; int trial; S_Detailed_DBI( Agent *pa, E_DBI *pe ) : Simulation(pa,pe) { acc_reward=0.0; acc_steps=0; trial=0; } void init( int argc, char *argv[] ); void start_trial( void ); void collect_data( const Sensation *ps, const Action *pa, const Sensation *pnext_s, double reward ); void output_summary( void ); }; class S_Report_DBI : public Simulation { public: double acc_reward; int acc_steps; int trial; S_Report_DBI( Agent *pa, E_DBI *pe ) : Simulation(pa,pe) { acc_reward=0.0; acc_steps=0; trial=0; } void init( int argc, char *argv[] ); void start_trial( void ); void collect_data( const Sensation *ps, const Action *pa, const Sensation *pnext_s, double reward ); }; // -- Member function definitions //============================================================================ // S_Detailed_DBI::init() void S_Detailed_DBI::init( int argc, char *argv[] ) { Simulation::init(argc,argv); acc_reward = 0.0; acc_steps = 0; trial = 0; } //============================================================================ // S_Detailed_DBI::start_trial() void S_Detailed_DBI::start_trial( void ) { Simulation::start_trial(); if ( trial > 0 ) { output_summary(); acc_reward = 0.0; acc_steps = 0; } trial++; } //============================================================================ // S_Detailed_DBI::collect_data() void S_Detailed_DBI::collect_data( const Sensation *ps, const Action *pa, const Sensation *pnext_s, double reward ) { State *pstate = (State *)ps; cout<< pstate->pos<< " "<< pstate->vel<< endl; acc_reward = ((E_DBI *)penv)->GAMMA*acc_reward + reward; acc_steps += 1; } //============================================================================ // S_Detailed_DBI::output_summary() void S_Detailed_DBI::output_summary( void ) { cout<< "# trial: "; cout.width(3); cout<< trial<< " steps: "; cout.width(5); cout<< acc_steps<< " acc. reward: "; cout.precision(10); cout<< acc_reward<< endl; } //============================================================================ // S_Report_DBI::init() void S_Report_DBI::init( int argc, char *argv[] ) { Simulation::init(argc,argv); acc_reward = 0.0; acc_steps = 0; trial = 0; cout<< "trials steps Acc. reward"<< endl; } //============================================================================ // S_Report_DBI::start_trial() void S_Report_DBI::start_trial( void ) { Simulation::start_trial(); if ( trial > 0 ) { cout.width(4); cout<< trial; cout.width(10); cout<< acc_steps; printf("%15.4f\n",acc_reward); acc_reward = 0.0; acc_steps = 0; } trial++; } //============================================================================ // S_Report_DBI::collect_data() void S_Report_DBI::collect_data( const Sensation *ps, const Action *pa, const Sensation *pnext_s, double reward ) { acc_reward = ((E_DBI *)penv)->GAMMA*acc_reward + reward; acc_steps += 1; } // -- Global function definitions int main( int argc, char *argv[] ) { E_DBI *pe; long num_trials = 50; long max_steps_per_trial = 200; // create agent according to switch #ifdef _OPTIMAL A_Optimal_DBI *pa = new A_Optimal_DBI; #endif #ifdef _CMAC A_CMAC_DBI *pa = new A_CMAC_DBI; #endif // create DBI environment pe = new E_DBI(1.0); // create simulation according to switch #ifdef _OPTIMAL psim = new S_Detailed_DBI(pa,pe); #endif #ifdef _CMAC psim = new S_Report_DBI(pa,pe); #endif // init with random seed: 1 char *args[1]; args[0] = "1"; psim->init( 1, args ); // run trials #ifdef _OPTIMAL psim->steps( max_steps_per_trial ); psim->output_summary(); #endif #ifdef _CMAC psim->trials( num_trials, max_steps_per_trial ); #endif } /****************************** end of file *********************************/