agent-optimal-dbi.cc (20-Sep-1996)

/***
*
* agent-optimal-dbi.cc
* 
* $Revision: 1.3 $
* 
* Description:
*   This module implements the agent's functions for controlling a
*   double integrator. This is a linear dynamical system satisfying:
*   x = A x + B u  with the quadratic cost x'Qx + u'Ru. ie. LQ regulator.
*
*   For this system, the optimal value function is quadratic. More
*   specifically, it is specified by a symmetric 2x2 matrix P as
*   V(x) = x'Px, where P satisfies the steady-state Riccati differential
*   equation:
*
*     0 = A'P + P A + Q - P (BR-1B') P
*
*   The optimal policy is given by u = - R-1B'P x
*
*   For this task, the matrixes are as follows:
*
*       vel            0  0  vel     1           0  0
*   x =        dx/dt =            +    u     Q =        R = 1
*       pos            1  0  pos     0           0  1
*
*                                       sqrt(2)    1
*   Solving the Riccati equation:   P =
*                                          1     sqrt(2)
*
*   which defines the optimal policy as u = - ( sqrt(2) vel + pos )
*   and the value function as V(vel,pos) = sqrt(2)*(vel*vel + pos*pos) +
*   2*vel*pos
*
* Author:
*   Juan Carlos Santamaria
*     E-mail: carlos@cc.gatech.edu
*     URL:    http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
*   $Id: agent-optimal-dbi.cc,v 1.3 1996/09/19 21:59:20 carlos Exp $
*
* Revision History:
*   $Log: agent-optimal-dbi.cc,v $
*   Revision 1.3  1996/09/19  21:59:20  carlos
*   Eliminate the arguments ps and pa from Agent::step.
*
*   Revision 1.2  1996/08/28  20:03:24  carlos
*   Change in the order of arguments in Agent::step:
*     old: S,A,r,S'    new: S,A,S',r
*
*   Revision 1.1  1996/08/14  20:54:08  carlos
*   Initial revision
*
****/

#pragma implementation


// -- Include files

#include < math.h >

#include "agent-optimal-dbi.h"


// -- Member function definitions

//============================================================================
// A_Optimal_DBI::init()

void A_Optimal_DBI::init( int argc, char *argv[] )
{
}


//============================================================================
// A_Optimal_DBI::start_trial()

Action *A_Optimal_DBI::start_trial( const Sensation *psens )
{
    State *ps = (State *)psens;

    double acc = -( sqrt(2.0)*ps->vel + ps->pos );
    
    acc = acc < FOR_MIN ? FOR_MIN : (acc > FOR_MAX ? FOR_MAX : acc);
    
    return new Force(acc);
}


//============================================================================
// A_Optimal_DBI::step()

Action *A_Optimal_DBI::step( const Sensation *pnext_sens,
                             double           reward )
{
    State *ps = (State *)pnext_sens;

    double acc = -( sqrt(2.0)*ps->vel + ps->pos );
    
    acc = acc < FOR_MIN ? FOR_MIN : (acc > FOR_MAX ? FOR_MAX : acc);
    
    return new Force(acc);
}


/****************************** end of file *********************************/