invert3channelcontroller.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2005 by Robot Group Leipzig                             *
00003  *    martius@informatik.uni-leipzig.de                                    *
00004  *    fhesse@informatik.uni-leipzig.de                                     *
00005  *    der@informatik.uni-leipzig.de                                        *
00006  *                                                                         *
00007  *   LICENSE:                                                              *
00008  *   This work is licensed under the Creative Commons                      *
00009  *   Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of   *
00010  *   this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
00011  *   or send a letter to Creative Commons, 543 Howard Street, 5th Floor,   *
00012  *   San Francisco, California, 94105, USA.                                *
00013  *                                                                         *
00014  *   This program is distributed in the hope that it will be useful,       *
00015  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00016  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
00017  *                                                                         *
00018  *   $Log: invert3channelcontroller.h,v $
00019  *   Revision 1.4  2006/07/14 12:23:58  martius
00020  *   selforg becomes HEAD
00021  *
00022  *   Revision 1.2.6.2  2006/07/10 13:05:16  martius
00023  *   NON-COMMERICAL LICENSE added to controllers
00024  *
00025  *                                                                         *
00026  ***************************************************************************/
00027  
00028 #ifndef __INVERT3CHANNELCONTROLLER_H
00029 #define __INVERT3CHANNELCONTROLLER_H
00030 
00031 #include "invertcontroller.h"
00032 
00033 #include <cassert>
00034 #include <math.h>
00035 
00036 
00037 /**
00038  * class for robot controller that use naglaa's direct matrix inversion for n channels 
00039  * (simple one layer networks)
00040  * 
00041  * Implements standart parameters: eps, rho, mu, stepnumber4avg, stepnumber4delay
00042  */
00043 template <int NUMBER_CHANNELS, int BUFFER_SIZE=2> class Invert3ChannelController : public InvertController {
00044 
00045 public:
00046 
00047   /*
00048     Invert3ChannelController();
00049 
00050     //virtual ~Invert3ChannelController(){}
00051 
00052 
00053     /// performs one step (includes learning). Calulates motor commands from sensor inputs.
00054     virtual void step(const sensor*, motor*);
00055 
00056     /// performs one step without learning. Calulates motor commands from sensor inputs.
00057     virtual void stepNoLearning(const sensor*, motor*);
00058   */
00059 
00060 
00061   //protected:
00062 public:
00063   double A[NUMBER_CHANNELS][NUMBER_CHANNELS];    ///< model matrix
00064   double C[NUMBER_CHANNELS][NUMBER_CHANNELS];    ///< controller matrix
00065   double h[NUMBER_CHANNELS];       ///< bias vector
00066   
00067   double x_buffer[BUFFER_SIZE][NUMBER_CHANNELS]; ///< buffer for input values, x[t%buffersize]=actual value, x[(t-1+buffersize)%buffersize]=x(t-1)  
00068   double y_buffer[BUFFER_SIZE][NUMBER_CHANNELS]; ///< buffer for output values, y[t%buffersize]=actual value(if already calculated!), y[(t-1+buffersize)%buffersize]=y(t-1)   
00069   
00070   double xsi4E[NUMBER_CHANNELS];
00071   double xsi4Model[NUMBER_CHANNELS];
00072 
00073   int    t;       ///< number of steps, needed for ringbuffer x_buffer
00074 
00075 
00076 
00077 
00078 
00079   /*
00080     virtual void inverseMatrix(double Q[NUMBER_CHANNELS][NUMBER_CHANNELS], double Q_1[NUMBER_CHANNELS][NUMBER_CHANNELS]);
00081 
00082     virtual double calculateE(double *x_delay, double *y_delay);
00083   
00084     /// learn values h,C   //,A
00085     virtual void learn(double *x_delay, double *y_delay);
00086   
00087     /// learn model parameter (matrix A) by gradient descent
00088     virtual void learnModel(double *x_actual, double *y_effective);
00089 
00090 
00091     /// calculate delayed values
00092     virtual void calculateDelayedValues(double source[BUFFER_SIZE][NUMBER_CHANNELS], int number_steps_of_delay_, double *target);
00093 
00094     virtual void calculateSmoothValues(double source[BUFFER_SIZE][NUMBER_CHANNELS], int number_steps_for_averaging_, double *target);
00095 
00096     /// calculate controller ouptus
00097     virtual void calculateControllerValues(double *x_smooth, double *y);
00098   
00099     // put new value in ring buffer
00100     virtual void putInBuffer(double buffer[BUFFER_SIZE][NUMBER_CHANNELS], double *values);  
00101 
00102   */
00103   /// neuron transfer function
00104   virtual double g(double z)
00105   {
00106     return tanh(z);
00107   };
00108 
00109 
00110 
00111   ///
00112   virtual double g_s(double z)
00113   {
00114     return 1.0 - tanh(z) * tanh(z);
00115   };
00116 
00117 
00118 
00119   /// squashing function, to protect against to large weight updates
00120   virtual double squash(double z)
00121   {
00122     return 0.1 * tanh(10.0 * z);
00123   };
00124 
00125 
00126   //template <int NUMBER_CHANNELS, int BUFFER_SIZES> 
00127   //    Invert3ChannelController<int NUMBER_CHANNELS, int BUFFER_SIZES>::
00128   Invert3ChannelController(){
00129   
00130     t=0;
00131     
00132     for (int i = 0; i < NUMBER_CHANNELS; i++)
00133       {
00134         h[i] = 0.0;
00135         for (int j = 0; j < NUMBER_CHANNELS; j++)
00136           {
00137             if (i == j)
00138               {
00139                 A[i][j] = 1.0;
00140                 C[i][j] = 0.1;
00141               }
00142             else
00143               {
00144                 A[i][j] = 0.0;
00145                 C[i][j] = 0.0;
00146               }
00147           }
00148       }
00149     for (int i = 0; i < NUMBER_CHANNELS; i++)
00150       {
00151         for (int k = 0; k < BUFFER_SIZE; k++)
00152           {
00153             x_buffer[k][i] = 0;
00154             y_buffer[k][i] = 0; 
00155           }
00156       }
00157     
00158     /*    // print initial values
00159           std::cout<<"Constructor of RobotLearnControl:"<<std::endl;
00160           std::cout<<"init: epsilon="<<eps<<std::endl;     
00161           std::cout<<"init: rho="<<rho<<std::endl;         
00162           std::cout<<"init: stepnumber4delay="<<stepnumber4delay<<std::endl; 
00163           std::cout<<"init: stepnumber4avg="<<stepnumber4avg<<std::endl;     
00164           std::cout<<"init: delta="<<delta<<std::endl; 
00165           std::cout<<"init: m (for calculation of E)="<<m<<std::endl; 
00166     */    
00167   };
00168 
00169   /// performs one step (includes learning). Calulates motor commands from sensor inputs.
00170   virtual void step(const sensor* x_, int sensornumber, 
00171                     motor* y_, int motornumber) {
00172     double x_smooth[NUMBER_CHANNELS];
00173     double x_effective[NUMBER_CHANNELS];
00174     double y_effective[NUMBER_CHANNELS];    
00175     for (int i = 0; i < NUMBER_CHANNELS; i++)
00176       {
00177         x_smooth[i] = 0.0;
00178         x_effective[i] = 0.0;
00179         y_effective[i] = 0.0;
00180       }
00181 
00182     // put new input value in ring buffer x_buffer
00183     putInBuffer(x_buffer, x_);
00184 
00185     // averaging over the last stepnumber4avg values of x_buffer
00186     calculateSmoothValues(x_buffer, stepnumber4avg, x_smooth);
00187     
00188     // calculate controller values based on smoothed input values
00189     calculateControllerValues(x_smooth, y_);
00190 
00191     // put new output value in ring buffer y_buffer
00192     putInBuffer(y_buffer, y_);
00193 
00194     // calculate effective input/output, which is (actual-stepnumber4delay) element of buffer
00195     calculateDelayedValues(x_buffer, stepnumber4delay, x_effective);    
00196     calculateDelayedValues(y_buffer, stepnumber4delay, y_effective);
00197  
00198     // learn controller with effective input/output
00199     learn(x_, x_effective, y_effective);
00200     
00201     // learn model with actual input and effective output (that produced the actual input)
00202     learnModel(x_, y_effective);
00203 
00204     // update step counter
00205     t++;
00206   };
00207 
00208 
00209   /// performs one step without learning. Calulates motor commands from sensor inputs.
00210   virtual void stepNoLearning(const sensor* x_, int sensornumber,  
00211                               motor* y_, int motornumber){
00212     double x_smooth[NUMBER_CHANNELS];
00213     for (int i = 0; i < NUMBER_CHANNELS; i++)
00214       {
00215         x_smooth[i] = 0.0;
00216       }
00217 
00218 
00219     // put new input value in ring buffer x_buffer
00220     putInBuffer(x_buffer, x_);
00221 
00222     // averaging over the last stepnumber4avg values of x_buffer
00223     calculateSmoothValues(x_buffer, stepnumber4avg, x_smooth);
00224     
00225     // calculate controller values based on smoothed input values
00226     calculateControllerValues(x_smooth, y_);
00227 
00228     // put new output value in ring buffer y_buffer
00229     putInBuffer(y_buffer, y_);
00230 
00231     // update step counter
00232     t++;
00233   };
00234   
00235  
00236 protected:
00237   virtual void inverseMatrix(double Q[NUMBER_CHANNELS][NUMBER_CHANNELS], double Q_1[NUMBER_CHANNELS][NUMBER_CHANNELS]){
00238     // Berechne Inverse von Q
00239 
00240     // only if NUMBER_CHANNELS<4
00241     assert(NUMBER_CHANNELS<4);
00242     if (NUMBER_CHANNELS==2){
00243 
00244       double det = Q[0][0] * Q[1][1] - Q[0][1] * Q[1][0];
00245       Q_1[0][0] = Q[1][1] / det;
00246       Q_1[1][1] = Q[0][0] / det;
00247       Q_1[0][1] = -Q[0][1] / det;
00248       Q_1[1][0] = -Q[1][0] / det;
00249       
00250     }
00251     
00252     
00253     if (NUMBER_CHANNELS==3){  
00254       
00255       double  Q_adjoint[NUMBER_CHANNELS][NUMBER_CHANNELS]  ;
00256       double  detQ=0  ;
00257       
00258       //calculate the inverse of Q
00259       Q_adjoint[0][0]=Q[1][1]*Q[2][2]-Q[1][2]*Q[2][1] ;
00260       Q_adjoint[0][1]=(Q[1][2]*Q[2][0]-Q[1][0]*Q[2][2]) ;
00261       Q_adjoint[0][2]=Q[1][0]*Q[2][1]-Q[1][1]*Q[2][0] ;
00262       Q_adjoint[1][0]=(Q[2][1]*Q[0][2]-Q[0][1]*Q[2][2]) ;
00263       Q_adjoint[1][1]=Q[0][0]*Q[2][2]-Q[0][2]*Q[2][0] ;
00264       Q_adjoint[1][2]=(Q[0][1]*Q[2][0]-Q[0][0]*Q[2][1]) ;
00265       Q_adjoint[2][0]=Q[0][1]*Q[1][2]-Q[1][1]*Q[0][2] ;
00266       Q_adjoint[2][1]=(Q[1][0]*Q[0][2]-Q[0][0]*Q[1][2]) ;
00267       Q_adjoint[2][2]=Q[0][0]*Q[1][1]-Q[0][1]*Q[1][0] ;
00268       detQ=Q[0][0]*Q_adjoint[0][0]+Q[0][1]*Q_adjoint[0][1]+Q[0][2]*Q_adjoint[0][2] ;
00269       for(int i=0; i<NUMBER_CHANNELS; i++){
00270         for(int j=0; j<NUMBER_CHANNELS; j++) {                                                     
00271           Q_1[i][j]=(Q_adjoint[j][i])/detQ  ;
00272         }
00273       }       
00274     } 
00275   };
00276 
00277 
00278   virtual double calculateE(const double *x_, const double *x_delay, const  double *y_delay) {
00279     double L[NUMBER_CHANNELS][NUMBER_CHANNELS];
00280     double Q[NUMBER_CHANNELS][NUMBER_CHANNELS];
00281     double Q_1[NUMBER_CHANNELS][NUMBER_CHANNELS];
00282     double z[NUMBER_CHANNELS];
00283     
00284 
00285     // Calculate z based on the delayed inputs since the present input x is
00286     // produced by the outputs tau time steps before
00287     // which on their hand are y = K(x_D)
00288     // due to the delay in the feed back loop.
00289 
00290     for (int i = 0; i < NUMBER_CHANNELS; i++)
00291       {
00292         z[i] = h[i];
00293         for (int j = 0; j < NUMBER_CHANNELS; j++)
00294           {
00295             z[i] += C[i][j] * x_delay[j];
00296           }
00297       }
00298 
00299     // Berechne Matrix L
00300     for (int i = 0; i < NUMBER_CHANNELS; i++)
00301       {
00302         for (int j = 0; j < NUMBER_CHANNELS; j++)
00303           {
00304             L[i][j] = 0.0;
00305             for (int k = 0; k < NUMBER_CHANNELS; k++)
00306               {
00307                 L[i][j] += A[i][k] * g_s(z[k]) * C[k][j];
00308               }
00309           }
00310       }
00311 
00312     // Berechne Q=LL^T
00313     for (int i = 0; i < NUMBER_CHANNELS; i++)
00314       {
00315         for (int j = 0; j < NUMBER_CHANNELS; j++)
00316           {
00317             Q[i][j] = 0.0;
00318             for (int k = 0; k < NUMBER_CHANNELS; k++)
00319               {
00320                 Q[i][j] += L[i][k] * L[j][k];
00321               }
00322             if (i == j)
00323               Q[i][j] += rho / NUMBER_CHANNELS; // Regularisation
00324           }
00325       }
00326 
00327     // Berechne Inverse von Q
00328     inverseMatrix(Q, Q_1);
00329 
00330     // Berechne xsi
00331     for (int i = 0; i < NUMBER_CHANNELS; i++)
00332       {
00333         xsi4E[i] = x_[i];
00334         for (int j = 0; j < NUMBER_CHANNELS; j++)
00335           {
00336             xsi4E[i] -= A[i][j] * y_delay[j];  // using old y value -> no influence of changes (adding delta) in controller
00337             // very very strange!!!!
00338             //xsi4E[i] -= A[i][j] * g(z[j]);     // using recalculating y -> influence of changes (adding delta) in controller
00339           }
00340       }
00341     double E = 0;
00342     for (int i = 0; i < NUMBER_CHANNELS; i++)
00343       {
00344         for (int j = 0; j < NUMBER_CHANNELS; j++)
00345           {
00346             E += xsi4E[i] * Q_1[i][j] * xsi4E[j];
00347           }
00348       }
00349 
00350     double E_s=0;
00351     for (int i = 0; i < NUMBER_CHANNELS; i++)
00352       {
00353         for (int j = 0; j < NUMBER_CHANNELS; j++)
00354           {
00355             E_s += (A[i][j]*g(z[j]) - x_[i]) * (A[i][j]*g(z[j]) - x_[i]);
00356           }
00357       }
00358 
00359     E=(1-m)*E+ m*E_s;
00360     return (E);
00361 
00362   };
00363 
00364 
00365   virtual void learn(const double *x_, const double *x_delay, const double *y_delay)
00366   {
00367     //    double A_update[NUMBER_CHANNELS][NUMBER_CHANNELS];
00368     double C_update[NUMBER_CHANNELS][NUMBER_CHANNELS];
00369     double h_update[NUMBER_CHANNELS];
00370 
00371     double E_0 = calculateE(x_,x_delay, y_delay);
00372 
00373     // calculate updates for h,C,A
00374     for (int i = 0; i < NUMBER_CHANNELS; i++)
00375       {
00376         h[i] += delta;
00377         h_update[i] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta;
00378         h[i] -= delta;
00379         for (int j = 0; j < NUMBER_CHANNELS; j++)
00380           {
00381             C[i][j] += delta;
00382             C_update[i][j] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta;
00383             C[i][j] -= delta;
00384             //A[i][j] += delta;
00385             //A_update[i][j] = -eps * a_factor * (calculateE(x_delay, y_delay) - E_0) / delta;
00386             //A[i][j] -= delta;
00387           }
00388       }
00389 
00390     // apply updates to h,C,A
00391     for (int i = 0; i < NUMBER_CHANNELS; i++)
00392       {
00393         h[i] += squash(h_update[i]);
00394         for (int j = 0; j < NUMBER_CHANNELS; j++)
00395           {
00396             C[i][j] += squash(C_update[i][j]);
00397             //A[i][j] += squash(A_update[i][j]);
00398           }
00399       }
00400   };
00401 
00402   
00403   virtual void learnModel(const double *x_actual, double *y_effective){
00404     /*  double z[N_output];    
00405         for(int i=0; i<N_output; i++){
00406         z[i]=h[i];
00407         for(int j=0; j<N_input; j++) {
00408         z[i]+=C[i][j]*x_D[j];
00409         }
00410         }
00411     */
00412     // Berechne xsi
00413     for(int i=0; i<NUMBER_CHANNELS; i++){
00414       xsi4Model[i]=x_actual[i];  
00415       for(int j=0; j<NUMBER_CHANNELS; j++){
00416         xsi4Model[i]-= A[i][j]*y_effective[j];
00417       }  
00418     }
00419 
00420     for(int i=0; i<NUMBER_CHANNELS; i++){
00421       for (int j=0; j<NUMBER_CHANNELS; j++){   
00422         A[i][j]+=squash( (factor_a*eps*0.2) *xsi4Model[i] * y_effective[j]) ;
00423       }
00424     }
00425   };
00426 
00427   
00428 
00429   
00430   
00431   /// calculate delayed values
00432   virtual void calculateDelayedValues(double source[BUFFER_SIZE][NUMBER_CHANNELS], paramval number_steps_of_delay_, double *target)  
00433   {
00434     // number_steps_of_delay_ must not be larger than BUFFER_SIZE
00435     assert ((int)number_steps_of_delay_ < BUFFER_SIZE);
00436 
00437     // get delayed value from ring buffer
00438 
00439     for (int i = 0; i < NUMBER_CHANNELS; i++)
00440       {
00441         target[i] = source[(t - (int)number_steps_of_delay_ + BUFFER_SIZE) % BUFFER_SIZE][i];
00442       }
00443   };
00444 
00445   virtual void calculateSmoothValues(double source[BUFFER_SIZE][NUMBER_CHANNELS], paramval number_steps_for_averaging_, double *target)
00446   {
00447     // number_steps_for_averaging_ must not be larger than BUFFER_SIZE
00448     assert ((int)number_steps_for_averaging_ <= BUFFER_SIZE);
00449 
00450     for (int i = 0; i < NUMBER_CHANNELS; i++)
00451       {
00452         target[i] = 0.0;
00453         for (int k = 0; k < (int)number_steps_for_averaging_; k++)
00454           {
00455             target[i] += source[(t - k + BUFFER_SIZE) % BUFFER_SIZE][i]/ (double) (number_steps_for_averaging_);
00456           }
00457       }
00458   };
00459 
00460 
00461 
00462   /// calculate controller ouptus
00463   virtual void calculateControllerValues(double *x_smooth, double *y)  
00464   {
00465     double z[NUMBER_CHANNELS];
00466 
00467     for (int i = 0; i < NUMBER_CHANNELS; i++)
00468       {
00469         z[i] = h[i];
00470         for (int j = 0; j < NUMBER_CHANNELS; j++)
00471           {
00472             z[i] += C[i][j] * x_smooth[j];
00473           }
00474         y[i] = g(z[i]);
00475       }
00476   };
00477   
00478   
00479   // put new value in ring buffer
00480   virtual void putInBuffer(double buffer[BUFFER_SIZE][NUMBER_CHANNELS], const double *values){  
00481     for (int i = 0; i < NUMBER_CHANNELS; i++)
00482       {
00483         buffer[(t+BUFFER_SIZE)% BUFFER_SIZE][i] = values[i];
00484       }
00485   };
00486 
00487 
00488 
00489   virtual int getInternalParamNames(paramkey*& keylist){
00490     keylist=(paramkey*)malloc(sizeof(paramkey)*10);
00491     keylist[0]="C00";
00492     keylist[1]="C01";
00493     keylist[2]="C10";
00494     keylist[3]="C11";
00495     keylist[4]="H0";
00496     keylist[5]="H1";
00497     keylist[6]="A00";
00498     keylist[7]="A01";
00499     keylist[8]="A10";
00500     keylist[9]="A11";
00501     return 10;
00502   }
00503 
00504   virtual int getInternalParams(paramval* vallist, int length) {
00505     if(length < 10) return 0;
00506     vallist[0]=C[0][0];
00507     vallist[1]=C[0][1];
00508     vallist[2]=C[1][0];
00509     vallist[3]=C[1][1];
00510     vallist[4]=h[0];
00511     vallist[5]=h[1];
00512     vallist[6]=A[0][0];
00513     vallist[7]=A[0][1];
00514     vallist[8]=A[1][0];
00515     vallist[9]=A[1][1];
00516     return 10;
00517   }
00518 
00519 
00520 };
00521 
00522 AbstractController* getController(int sensornumber, int motornumber, 
00523                                   int param1/*=0*/, double param2/*=0*/)
00524 {
00525   if(param1 < 2) param1 = 2;
00526   return new Invert3ChannelController<2, 4>;
00527 }
00528 
00529 
00530 
00531 #endif

Generated on Mon Aug 7 16:40:14 2006 for Robotsystem of the Robot Group Leipzig by  doxygen 1.4.7