/***
*
* agent-optimal-dbi.h
*
* $Revision: 1.3 $
*
* Description:
* This module is the interface for the optimal agent's functions
* controlling the double integrator. This is a linear dynamical system
* satisfying: dx/dt = A x + B u with the quadratic cost x'Qx + u'Ru.
* ie. LQ regulator.
*
* For this system, the optimal value function is quadratic. More
* specifically, it is specified by a symmetric 2x2 matrix P as
* V(x) = x'Px, where P satisfies the steady-state Riccati differential
* equation:
*
* 0 = A'P + P A + Q - P (BR-1B') P
*
* The optimal policy is given by u = - R-1B'P x
*
* For this task, the matrixes are as follows:
*
* vel 0 0 vel 1 0 0
* x = dx/dt = + u Q = R = 1
* pos 1 0 pos 0 0 1
*
* sqrt(2) 1
* Solving the Riccati equation: P =
* 1 sqrt(2)
*
* which defines the optimal policy as u = - ( sqrt(2) vel + pos )
* and the value function as V(vel,pos) = sqrt(2)*(vel*vel + pos*pos) +
* 2*vel*pos
*
* Author:
* Juan Carlos Santamaria
* E-mail: carlos@cc.gatech.edu
* URL: http://www.cc.gatech.edu/ai/students/jcs
*
* File name:
* $Id: agent-optimal-dbi.h,v 1.3 1996/09/19 21:58:38 carlos Exp $
*
* Revision History:
* $Log: agent-optimal-dbi.h,v $
* Revision 1.3 1996/09/19 21:58:38 carlos
* Eliminate the arguments ps and pa from Agent::step.
*
* Revision 1.2 1996/08/28 20:17:15 carlos
* Change in the order of arguments in Agent::step:
* old: S,A,r,S' new: S,A,S',r
*
* Revision 1.1 1996/08/14 20:53:46 carlos
* Initial revision
*
****/
#pragma interface
#ifndef _INCL_AGENT_OPTIMAL_DBI
#define _INCL_AGENT_OPTIMAL_DBI
// -- Include files
#ifndef _INCL_RLI
#include "rli.h"
#endif
#ifndef _INCL_ENVIRONMENT_DBI
#include "environment-dbi.h"
#endif
// -- Class and type declarations
class A_Optimal_DBI : public Agent {
public:
A_Optimal_DBI( void ) : Agent () {}
~A_Optimal_DBI( void ) {}
void init( int argc, char *argv[] );
Action *start_trial( const Sensation *ps );
Action *step( const Sensation *pnext_s,
double reward );
};
/****************************** end of file *********************************/
#endif