/***
*
* agent-optimal-dbi.cc
*
* $Revision: 1.3 $
*
* Description:
* This module implements the agent's functions for controlling a
* double integrator. This is a linear dynamical system satisfying:
* x = A x + B u with the quadratic cost x'Qx + u'Ru. ie. LQ regulator.
*
* For this system, the optimal value function is quadratic. More
* specifically, it is specified by a symmetric 2x2 matrix P as
* V(x) = x'Px, where P satisfies the steady-state Riccati differential
* equation:
*
* 0 = A'P + P A + Q - P (BR-1B') P
*
* The optimal policy is given by u = - R-1B'P x
*
* For this task, the matrixes are as follows:
*
* vel 0 0 vel 1 0 0
* x = dx/dt = + u Q = R = 1
* pos 1 0 pos 0 0 1
*
* sqrt(2) 1
* Solving the Riccati equation: P =
* 1 sqrt(2)
*
* which defines the optimal policy as u = - ( sqrt(2) vel + pos )
* and the value function as V(vel,pos) = sqrt(2)*(vel*vel + pos*pos) +
* 2*vel*pos
*
* Author:
* Juan Carlos Santamaria
* E-mail: carlos@cc.gatech.edu
* URL: http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
* $Id: agent-optimal-dbi.cc,v 1.3 1996/09/19 21:59:20 carlos Exp $
*
* Revision History:
* $Log: agent-optimal-dbi.cc,v $
* Revision 1.3 1996/09/19 21:59:20 carlos
* Eliminate the arguments ps and pa from Agent::step.
*
* Revision 1.2 1996/08/28 20:03:24 carlos
* Change in the order of arguments in Agent::step:
* old: S,A,r,S' new: S,A,S',r
*
* Revision 1.1 1996/08/14 20:54:08 carlos
* Initial revision
*
****/
#pragma implementation
// -- Include files
#include < math.h >
#include "agent-optimal-dbi.h"
// -- Member function definitions
//============================================================================
// A_Optimal_DBI::init()
void A_Optimal_DBI::init( int argc, char *argv[] )
{
}
//============================================================================
// A_Optimal_DBI::start_trial()
Action *A_Optimal_DBI::start_trial( const Sensation *psens )
{
State *ps = (State *)psens;
double acc = -( sqrt(2.0)*ps->vel + ps->pos );
acc = acc < FOR_MIN ? FOR_MIN : (acc > FOR_MAX ? FOR_MAX : acc);
return new Force(acc);
}
//============================================================================
// A_Optimal_DBI::step()
Action *A_Optimal_DBI::step( const Sensation *pnext_sens,
double reward )
{
State *ps = (State *)pnext_sens;
double acc = -( sqrt(2.0)*ps->vel + ps->pos );
acc = acc < FOR_MIN ? FOR_MIN : (acc > FOR_MAX ? FOR_MAX : acc);
return new Force(acc);
}
/****************************** end of file *********************************/