/*** * * agent-optimal-dbi.cc * * $Revision: 1.3 $ * * Description: * This module implements the agent's functions for controlling a * double integrator. This is a linear dynamical system satisfying: * x = A x + B u with the quadratic cost x'Qx + u'Ru. ie. LQ regulator. * * For this system, the optimal value function is quadratic. More * specifically, it is specified by a symmetric 2x2 matrix P as * V(x) = x'Px, where P satisfies the steady-state Riccati differential * equation: * * 0 = A'P + P A + Q - P (BR-1B') P * * The optimal policy is given by u = - R-1B'P x * * For this task, the matrixes are as follows: * * vel 0 0 vel 1 0 0 * x = dx/dt = + u Q = R = 1 * pos 1 0 pos 0 0 1 * * sqrt(2) 1 * Solving the Riccati equation: P = * 1 sqrt(2) * * which defines the optimal policy as u = - ( sqrt(2) vel + pos ) * and the value function as V(vel,pos) = sqrt(2)*(vel*vel + pos*pos) + * 2*vel*pos * * Author: * Juan Carlos Santamaria * E-mail: carlos@cc.gatech.edu * URL: http://www.cc.gatech.edu/ai/students/jcs * * File name: * $Id: agent-optimal-dbi.cc,v 1.3 1996/09/19 21:59:20 carlos Exp $ * * Revision History: * $Log: agent-optimal-dbi.cc,v $ * Revision 1.3 1996/09/19 21:59:20 carlos * Eliminate the arguments ps and pa from Agent::step. * * Revision 1.2 1996/08/28 20:03:24 carlos * Change in the order of arguments in Agent::step: * old: S,A,r,S' new: S,A,S',r * * Revision 1.1 1996/08/14 20:54:08 carlos * Initial revision * ****/ #pragma implementation // -- Include files #include < math.h > #include "agent-optimal-dbi.h" // -- Member function definitions //============================================================================ // A_Optimal_DBI::init() void A_Optimal_DBI::init( int argc, char *argv[] ) { } //============================================================================ // A_Optimal_DBI::start_trial() Action *A_Optimal_DBI::start_trial( const Sensation *psens ) { State *ps = (State *)psens; double acc = -( sqrt(2.0)*ps->vel + ps->pos ); acc = acc < FOR_MIN ? FOR_MIN : (acc > FOR_MAX ? FOR_MAX : acc); return new Force(acc); } //============================================================================ // A_Optimal_DBI::step() Action *A_Optimal_DBI::step( const Sensation *pnext_sens, double reward ) { State *ps = (State *)pnext_sens; double acc = -( sqrt(2.0)*ps->vel + ps->pos ); acc = acc < FOR_MIN ? FOR_MIN : (acc > FOR_MAX ? FOR_MAX : acc); return new Force(acc); } /****************************** end of file *********************************/