/***
*
* agent-cmac-dbi.cc
*
* $Revision: 1.3 $
*
* Description:
* This module implements the agent's functions for controlling a
* double integrator. The function approximation is implemented with
* cmacs.
*
* Author:
* Juan Carlos Santamaria
* E-mail: carlos@cc.gatech.edu
* URL: http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
* $Id: agent-cmac-dbi.cc,v 1.3 1996/09/19 22:10:22 carlos Exp $
*
* Revision History:
* $Log: agent-cmac-dbi.cc,v $
* Revision 1.3 1996/09/19 22:10:22 carlos
* - The discount factor, GAMMA, is now an agent field instead of a constant.
* The value can be set at the moment of creation through the constructor.
* The default value is 0.99.
* - Eliminate the arguments ps and pa from Agent::step.
*
* Revision 1.2 1996/08/29 15:53:19 carlos
* - Now using the constant TERMINAL_STATE instead of 0.
* - Old local variables are now member variables.
* - New private function A_CMAC_DBI::policy was added and subsequent
* modifications.
* - Change in the order of arguments in Agent::step:
* old: S,A,r,S' new: S,A,S',r
*
* Revision 1.1 1996/08/14 20:54:41 carlos
* Initial revision
*
****/
#pragma implementation
// -- Include files
#include < iostream.h >
#include < sys/stat.h >
#include "agent-cmac-dbi.h"
#include "usrdef.h"
#include "rand.h"
#include "cmac.h"
// -- Local constants
static const int NINTERVALS = 12;
static const int NACTIONS = 24+1;
static const double EPSILON = 0.0;
static const double LAMBDA = 0.7;
static const double ALPHA = 0.1;
static const double V_0 = 0.0;
// -- Local function declarations
void encode_state( double code[3], const State* ps );
void encode_action( double code[3], const Force *pa );
// -- Member function definitions
//============================================================================
// A_CMAC_DBI::init
void A_CMAC_DBI::init( int argc, char *argv[] )
{
if (argc< 1) {
cerr<< "A_CMAC_DBI::init: wrong number of arguments"<< endl;
cerr<< "init args: rnd_stream [cmac.file] [-v (verbose)]"<< endl;
exit(1);
}
cout<< "A_CMAC_DBI::init:"<< endl;
// set random stream
rnd_strm = atoi(*argv++); argc--;
// process command line arguments
while( argc-->0 ) {
if ( strcmp(*argv,"-v") == 0 )
verbose=TRUE;
else
strcpy(filename,*argv);
argv++;
}
bool load_file = FALSE;
struct stat buf;
if ( (strlen(filename)>0) && (stat(filename,&buf)==0) ) {
// load cmac
load_file = TRUE;
pcmac = load_cmac_ca( filename );
if ( pcmac == 0 )
error("A_CMAC_DBI::init: cannot read file: ",filename);
}
else {
pcmac = new CMAC_CA(3,36);
// initialize cmac
int pos_id, vel_id, for_id;
int ids[3];
// define cmac variables
pos_id=pcmac->define_variable("pos",POS_MIN,POS_MAX,
NINTERVALS,CMAC::CLIP);
vel_id=pcmac->define_variable("vel",VEL_MIN,VEL_MAX,
NINTERVALS,CMAC::CLIP);
for_id=pcmac->define_variable("for",FOR_MIN,FOR_MAX,
NINTERVALS,CMAC::CLIP);
// define cmac grids
ids[0]=pos_id;
ids[1]=vel_id;
ids[2]=for_id;
pcmac->define_k_grids( 12, 3, &ids[0] );
ids[0]=pos_id;
ids[1]=vel_id;
pcmac->define_k_grids( 12, 2, &ids[0] );
ids[0]=pos_id;
pcmac->define_k_grids( 6, 1, &ids[0] );
ids[0]=vel_id;
pcmac->define_k_grids( 6, 1, &ids[0] );
// define cmac intitial value-function
pcmac->initialize_value_function( V_0 );
}
cout<< "\tprogram: cmac learning: "<< endl;
cout<< "\tfile: ";
if (load_file) cout<< "loading "<< filename<< endl;
else if (strlen(filename)>0) cout<< "will store in "<< filename<< endl;
else cout<< "none"<< endl;
cout<< "\tcmac definintion:"<< endl;
cout<< "\t\tvariables: "<< pcmac->nvars<< endl;
for( int i=0 ; invars ; i++ ) {
cout<< "\t\t "<< pcmac->vars[i]->name<< ":\t";
cout<< "max: "<< pcmac->vars[i]->max_limit<< " ";
cout<< "min: "<< pcmac->vars[i]->min_limit<< " ";
cout<< "intervals: "<< pcmac->vars[i]->intervals<< endl;
}
cout<< "\t\ttillings: "<< pcmac->ngrids<< endl;
cout<< "\t\t 12 with pos, vel, for"<< endl;
cout<< "\t\t 12 with pos, vel"<< endl;
cout<< "\t\t 6 with pos"<< endl;
cout<< "\t\t 6 with vel"<< endl;
cout<< "\tgamma: "<< GAMMA<< endl;
cout<< "\tepsilon: "<< EPSILON<< endl;
cout<< "\tlambda: "<< LAMBDA<< endl;
cout<< "\talpha: "<< ALPHA<< endl;
}
//============================================================================
// A_CMAC_DBI::start_trial
Action *A_CMAC_DBI::start_trial( const Sensation *ps )
{
// clear eligibilities
pcmac->clear_eligibilities();
// select action
Force *pa = new Force;
policy( (State *)ps, pa );
return pa;
}
//============================================================================
// A_CMAC_DBI::step
Action *A_CMAC_DBI::step( const Sensation *pnext_s,
double reward )
{
if (verbose)
cerr<< "A_CMAC_DBI::step:"<< endl;
// update eligibility traces
pcmac->update_eligibilities( GAMMA, LAMBDA );
// compute previous state-action value
double prev_value;
prev_value = pcmac->compute_value_function( features );
// replace eligibilities
pcmac->replace_eligibilities( features );
// check if we reach a terminal state (ie. out of bounds)
if ( pnext_s == TERMINAL_STATE ) {
// update utility
double delta = reward - prev_value;
if (verbose) {
cerr<< "\texpected: ";
cerr.width(12);
cerr<< reward<< " current: ";
cerr.width(12);
cerr<< prev_value<< " delta: ";
cerr<< delta<< endl;
}
pcmac->update_value_function( ALPHA*delta );
return 0; // return a dummy force
}
// select next action and value function
Force *pnext_a = new Force;
double curr_value;
curr_value = policy( (State *)pnext_s, pnext_a );
// encode current state and compute current state-action value
double delta = reward + GAMMA*curr_value - prev_value;
if (verbose) {
cerr<< "\texpected: ";
cerr.width(12);
cerr<< (reward + GAMMA*curr_value)<< " current: ";
cerr.width(12);
cerr<< prev_value<< " delta: ";
cerr<< delta<< endl;
}
pcmac->update_value_function( ALPHA*delta );
return pnext_a;
}
//============================================================================
// A_CMAC_DBI::output_value_function_grid
void A_CMAC_DBI::output_value_function_grid( ostream& to )
{
int i, j;
int features[36];
double c_pos, c_vel, c_for;
State wrk_s;
Force wrk_a;
double code[3];
double best, tmp;
c_pos = (POS_MAX-POS_MIN)/NINTERVALS;
c_vel = (VEL_MAX-VEL_MIN)/NINTERVALS;
c_for = (FOR_MAX-FOR_MIN)/NACTIONS;
for( j=NINTERVALS-1 ; j >= 0 ; j-- ) {
for( i=0 ; i < NINTERVALS ; i++ ) {
wrk_s.pos = POS_MIN + c_pos/2.0 + c_pos*i;
wrk_s.vel = VEL_MIN + c_vel/2.0 + c_vel*j;
wrk_a.acc = FOR_MIN + c_for/2.0;
encode_state( code, &wrk_s );
encode_action( code, &wrk_a );
pcmac->get_tiles( features, code);
best = pcmac->compute_value_function(features);
for( int a=1 ; a < NACTIONS ; a++ ) {
wrk_a.acc = FOR_MIN + c_for/2.0 + a*c_for;
encode_action( code, &wrk_a );
pcmac->get_tiles( features, code);
tmp = pcmac->compute_value_function(features);
if ( tmp > best)
best = tmp;
}
to<< best<< endl;
}
to<< endl;
}
}
//============================================================================
// A_CMAC_DBI::policy
double A_CMAC_DBI::policy( const State *ps, Force *pa )
{
if (verbose)
cerr<< "A_CMAC_DBI::policy:"<< endl;
double code[3];
double value;
int wrk_features[36];
double delta_a = (FOR_MAX-FOR_MIN)/NACTIONS;
// encode state with the minimum value for force first
encode_state( code, ps );
double qvalues[NACTIONS];
// analize all possibilities
for( int a=0 ; a < NACTIONS ; a++ ) {
pa->acc = FOR_MIN + delta_a/2.0 + a*delta_a;
encode_action( code, pa );
pcmac->get_tiles( wrk_features, code );
qvalues[a] = pcmac->compute_value_function(wrk_features);
}
// greedy search
int action = select_best(NACTIONS,qvalues,1.0-EPSILON,rnd_strm);
pa->acc = FOR_MIN + delta_a/2.0 + action*delta_a;
value = qvalues[action];
// store state-action features of decision
encode_action( code, pa );
pcmac->get_tiles( features, code );
if (verbose) {
cerr<< "\tvalue: "<< value<< endl;
cerr<< "\tacceleration: "<< pa->acc<< endl;
for( int k=0 ;k < NACTIONS ; k++ ) {
cerr<< "\t";
cerr.width(10);
cerr<< qvalues[k];
}
cerr<< endl;
cerr<< "\t";
for( int k=0 ; k < 36 ; k++ )
cerr<< features[k]<< " ";
cerr<< endl;
}
return value;
}
// -- Local function definitions
void encode_state( double code[3], const State *ps )
{
code[POS] = ps->pos;
code[VEL] = ps->vel;;
}
void encode_action( double code[3], const Force *pa )
{
code[FOR] = pa->acc;
}
/****************************** end of file *********************************/