dinvert3channelcontroller.hpp

Go to the documentation of this file.
00001 /* 
00002  *   LICENSE:                                                              *
00003  *   This work is licensed under the Creative Commons                      *
00004  *   Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of   *
00005  *   this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/ *
00006  *   or send a letter to Creative Commons, 543 Howard Street, 5th Floor,   *
00007  *   San Francisco, California, 94105, USA.                                *
00008  *                                                                         *
00009  *   This program is distributed in the hope that it will be useful,       *
00010  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00011  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *
00012 */
00013 
00014 double** allocateMatrix(int m, int n){
00015   double** mat=(double**)malloc(m*sizeof(double*));
00016   for(int i=0; i < m; i++){
00017     mat[i]=(double*)malloc(n*sizeof(double));
00018   }
00019   return mat;
00020 }
00021 
00022 void freeMatrix(double** mat, int m){
00023   for(int i=0; i < m; i++){
00024     free(mat[i]);
00025   }
00026   free(mat);
00027 }
00028 
00029 
00030 public: 
00031 
00032 /// return the name of the object (with version number)
00033 virtual constparamkey getName() const {  
00034   return name; 
00035 }
00036 
00037 
00038 virtual void init(int sensornumber, int motornumber){
00039   
00040 }
00041 
00042 
00043 DInvert3ChannelController(int numberchannels, int buffersize){
00044   
00045   eps=0.7;
00046   rho=0.0; 
00047   s4delay=1;
00048   s4avg=1;
00049   delta=0.01;
00050   factor_a=1.0;
00051   t=0;
00052   m=0.0;
00053     
00054   NUMBER_CHANNELS=numberchannels;
00055   BUFFER_SIZE=buffersize;
00056   // allocate field;
00057   A = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00058   C = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00059   h = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00060  
00061   x_buffer = allocateMatrix(BUFFER_SIZE, NUMBER_CHANNELS);
00062   y_buffer = allocateMatrix(BUFFER_SIZE, NUMBER_CHANNELS);
00063 
00064   xsi4E = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00065   xsi4Model = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00066 
00067   // internals
00068   x_smooth = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00069   x_effective = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00070   y_effective = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00071   Q_buf1 = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00072   Q_buf2 = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00073   L = allocateMatrix(NUMBER_CHANNELS, NUMBER_CHANNELS);
00074   z = (double*)malloc(NUMBER_CHANNELS*sizeof(double));
00075 
00076 
00077   for (int i = 0; i < NUMBER_CHANNELS; i++)
00078     {
00079       h[i] = 0.0;
00080       for (int j = 0; j < NUMBER_CHANNELS; j++)
00081         {
00082           if (i == j)
00083             {
00084               A[i][j] = (rand()/10.0)/RAND_MAX;
00085               C[i][j] = (rand()/10.0)/RAND_MAX;
00086             }
00087           else
00088             {
00089               A[i][j] = (rand()/10.0)/RAND_MAX;
00090               C[i][j] = (rand()/10.0)/RAND_MAX;
00091             }
00092         }
00093     }
00094   for (int i = 0; i < NUMBER_CHANNELS; i++)
00095     {
00096       for (int k = 0; k < BUFFER_SIZE; k++)
00097         {
00098           x_buffer[k][i] = 0;
00099           y_buffer[k][i] = 0;   
00100         }
00101     }
00102   // prepare name;
00103   Configurable::insertCVSInfo(name, "$RCSfile: dinvert3channelcontroller.hpp,v $", 
00104                                     "$Revision: 1.5 $");
00105     
00106   /*    // print initial values
00107         std::cout<<"Constructor of RobotLearnControl:"<<std::endl;
00108         std::cout<<"init: epsilon="<<eps<<std::endl;     
00109         std::cout<<"init: rho="<<rho<<std::endl;         
00110         std::cout<<"init: steps4delay="<<steps4delay<<std::endl; 
00111         std::cout<<"init: steps4avg="<<steps4avg<<std::endl;     
00112         std::cout<<"init: delta="<<delta<<std::endl; 
00113         std::cout<<"init: m (for calculation of E)="<<m<<std::endl; 
00114   */    
00115 };
00116 
00117 /// performs one step (includes learning). Calulates motor commands from sensor inputs.
00118 virtual void step(const sensor* x_, int sensornumber, motor* y_, int motornumber){
00119   for (int i = 0; i < NUMBER_CHANNELS; i++)
00120     {
00121       x_smooth[i] = 0.0;
00122       x_effective[i] = 0.0;
00123       y_effective[i] = 0.0;
00124     }
00125 
00126   // put new input value in ring buffer x_buffer
00127   putInBuffer(x_buffer, x_);
00128 
00129   // averaging over the last steps4avg values of x_buffer
00130   calculateSmoothValues(x_buffer, s4avg, x_smooth);
00131     
00132   // calculate controller values based on smoothed input values
00133   calculateControllerValues(x_smooth, y_);
00134 
00135   // put new output value in ring buffer y_buffer
00136   putInBuffer(y_buffer, y_);
00137 
00138   // calculate effective input/output, which is (actual-steps4delay) element of buffer
00139   calculateDelayedValues(x_buffer, s4delay, x_effective);    
00140   calculateDelayedValues(y_buffer, s4delay, y_effective);
00141  
00142   // learn controller with effective input/output
00143   learn(x_, x_effective, y_effective);
00144     
00145   // learn model with actual input and effective output (that produced the actual input)
00146   learnModel(x_, y_effective);
00147 
00148   // update step counter
00149   t++;
00150 };
00151 
00152 
00153 /// performs one step without learning. Calulates motor commands from sensor inputs.
00154 virtual void stepNoLearning(const sensor* x_, int sensornumber, motor* y_, int motornumber){
00155   for (int i = 0; i < NUMBER_CHANNELS; i++)
00156     {
00157       x_smooth[i] = 0.0;
00158     }
00159 
00160 
00161   // put new input value in ring buffer x_buffer
00162   putInBuffer(x_buffer, x_);
00163 
00164   // averaging over the last steps4avg values of x_buffer
00165   calculateSmoothValues(x_buffer, s4avg, x_smooth);
00166     
00167   // calculate controller values based on smoothed input values
00168   calculateControllerValues(x_smooth, y_);
00169 
00170   // put new output value in ring buffer y_buffer
00171   putInBuffer(y_buffer, y_);
00172 
00173   // update step counter
00174   t++;
00175 };
00176   
00177 virtual int getInternalParamNames(paramkey*& keylist){
00178   return 0;
00179 }
00180 
00181 virtual int getInternalParams(paramval* vallist, int length){
00182   int len = 0;
00183   return len;
00184 }
00185  
00186 protected:
00187 virtual void inverseMatrix(double** Q, double** Q_1){
00188   // Berechne Inverse von Q
00189 
00190   // only if NUMBER_CHANNELS<4
00191   assert(NUMBER_CHANNELS<4);
00192   if (NUMBER_CHANNELS==2){
00193 
00194     double det = Q[0][0] * Q[1][1] - Q[0][1] * Q[1][0];
00195     Q_1[0][0] = Q[1][1] / det;
00196     Q_1[1][1] = Q[0][0] / det;
00197     Q_1[0][1] = -Q[0][1] / det;
00198     Q_1[1][0] = -Q[1][0] / det;
00199       
00200   }
00201     
00202     
00203   if (NUMBER_CHANNELS==3){  
00204       
00205     double  detQ=0  ;
00206     double** Q_adjoint=Q_buf1;
00207       
00208     //calculate the inverse of Q
00209     Q_adjoint[0][0]=Q[1][1]*Q[2][2]-Q[1][2]*Q[2][1] ;
00210     Q_adjoint[0][1]=(Q[1][2]*Q[2][0]-Q[1][0]*Q[2][2]) ;
00211     Q_adjoint[0][2]=Q[1][0]*Q[2][1]-Q[1][1]*Q[2][0] ;
00212     Q_adjoint[1][0]=(Q[2][1]*Q[0][2]-Q[0][1]*Q[2][2]) ;
00213     Q_adjoint[1][1]=Q[0][0]*Q[2][2]-Q[0][2]*Q[2][0] ;
00214     Q_adjoint[1][2]=(Q[0][1]*Q[2][0]-Q[0][0]*Q[2][1]) ;
00215     Q_adjoint[2][0]=Q[0][1]*Q[1][2]-Q[1][1]*Q[0][2] ;
00216     Q_adjoint[2][1]=(Q[1][0]*Q[0][2]-Q[0][0]*Q[1][2]) ;
00217     Q_adjoint[2][2]=Q[0][0]*Q[1][1]-Q[0][1]*Q[1][0] ;
00218     detQ=Q[0][0]*Q_adjoint[0][0]+Q[0][1]*Q_adjoint[0][1]+Q[0][2]*Q_adjoint[0][2] ;
00219     for(int i=0; i<NUMBER_CHANNELS; i++){
00220       for(int j=0; j<NUMBER_CHANNELS; j++) {                                                     
00221         Q_1[i][j]=(Q_adjoint[j][i])/detQ  ;
00222       }
00223     }       
00224   } 
00225 };
00226 
00227 
00228 virtual double calculateE(const double *x_, const double *x_delay, const  double *y_delay) {
00229   double** Q = Q_buf1;
00230   double** Q_1= Q_buf2;
00231     
00232 
00233   // Calculate z based on the delayed inputs since the present input x is
00234   // produced by the outputs tau time steps before
00235   // which on their hand are y = K(x_D)
00236   // due to the delay in the feed back loop.
00237 
00238   for (int i = 0; i < NUMBER_CHANNELS; i++)
00239     {
00240       z[i] = h[i];
00241       for (int j = 0; j < NUMBER_CHANNELS; j++)
00242         {
00243           z[i] += C[i][j] * x_delay[j];
00244         }
00245     }
00246 
00247   // Berechne Matrix L
00248   for (int i = 0; i < NUMBER_CHANNELS; i++)
00249     {
00250       for (int j = 0; j < NUMBER_CHANNELS; j++)
00251         {
00252           L[i][j] = 0.0;
00253           for (int k = 0; k < NUMBER_CHANNELS; k++)
00254             {
00255               L[i][j] += A[i][k] * g_s(z[k]) * C[k][j];
00256             }
00257         }
00258     }
00259 
00260   // Berechne Q=LL^T
00261   for (int i = 0; i < NUMBER_CHANNELS; i++)
00262     {
00263       for (int j = 0; j < NUMBER_CHANNELS; j++)
00264         {
00265           Q[i][j] = 0.0;
00266           for (int k = 0; k < NUMBER_CHANNELS; k++)
00267             {
00268               Q[i][j] += L[i][k] * L[j][k];
00269             }
00270           if (i == j)
00271             Q[i][j] += rho / NUMBER_CHANNELS; // Regularisation
00272         }
00273     }
00274 
00275   // Berechne Inverse von Q
00276   inverseMatrix(Q, Q_1);
00277 
00278   // Berechne xsi
00279   for (int i = 0; i < NUMBER_CHANNELS; i++)
00280     {
00281       xsi4E[i] = x_[i];
00282       for (int j = 0; j < NUMBER_CHANNELS; j++)
00283         {
00284           xsi4E[i] -= A[i][j] * y_delay[j];  // using old y value -> no influence of changes (adding delta) in controller
00285           // very very strange!!!!
00286           //xsi4E[i] -= A[i][j] * g(z[j]);     // using recalculating y -> influence of changes (adding delta) in controller
00287         }
00288     }
00289   double E = 0;
00290   for (int i = 0; i < NUMBER_CHANNELS; i++)
00291     {
00292       for (int j = 0; j < NUMBER_CHANNELS; j++)
00293         {
00294           E += xsi4E[i] * Q_1[i][j] * xsi4E[j];
00295         }
00296     }
00297 
00298   double E_s=0;
00299   for (int i = 0; i < NUMBER_CHANNELS; i++)
00300     {
00301       for (int j = 0; j < NUMBER_CHANNELS; j++)
00302         {
00303           E_s += (A[i][j]*g(z[j]) - x_[i]) * (A[i][j]*g(z[j]) - x_[i]);
00304         }
00305     }
00306 
00307   E=(1-m)*E+ m*E_s;
00308   return (E);
00309 
00310 };
00311 
00312 
00313 virtual void learn(const double *x_, const double *x_delay, const double *y_delay)
00314 {
00315   //    double A_update[NUMBER_CHANNELS][NUMBER_CHANNELS];
00316   double** C_update=allocateMatrix(NUMBER_CHANNELS,NUMBER_CHANNELS);
00317   double* h_update=(double*)malloc(NUMBER_CHANNELS*sizeof(double));
00318 
00319   double E_0 = calculateE(x_,x_delay, y_delay);
00320 
00321   // calculate updates for h,C,A
00322   for (int i = 0; i < NUMBER_CHANNELS; i++)
00323     {
00324       h[i] += delta;
00325       h_update[i] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta;
00326       h[i] -= delta;
00327       for (int j = 0; j < NUMBER_CHANNELS; j++)
00328         {
00329           C[i][j] += delta;
00330           C_update[i][j] = -eps * (calculateE(x_,x_delay, y_delay) - E_0) / delta;
00331           C[i][j] -= delta;
00332           //A[i][j] += delta;
00333           //A_update[i][j] = -eps * a_factor * (calculateE(x_delay, y_delay) - E_0) / delta;
00334           //A[i][j] -= delta;
00335         }
00336     }
00337 
00338   // apply updates to h,C,A
00339   for (int i = 0; i < NUMBER_CHANNELS; i++)
00340     {
00341       h[i] += squash(h_update[i]);
00342       for (int j = 0; j < NUMBER_CHANNELS; j++)
00343         {
00344           C[i][j] += squash(C_update[i][j]);
00345           //A[i][j] += squash(A_update[i][j]);
00346         }
00347     }
00348   freeMatrix(C_update, NUMBER_CHANNELS);
00349   free(h_update);
00350 };
00351 
00352   
00353 virtual void learnModel(const double *x_actual, double *y_effective){
00354   /*    double z[N_output];    
00355         for(int i=0; i<N_output; i++){
00356         z[i]=h[i];
00357         for(int j=0; j<N_input; j++) {
00358         z[i]+=C[i][j]*x_D[j];
00359         }
00360         }
00361   */
00362   // Berechne xsi
00363   for(int i=0; i<NUMBER_CHANNELS; i++){
00364     xsi4Model[i]=x_actual[i];  
00365     for(int j=0; j<NUMBER_CHANNELS; j++){
00366       xsi4Model[i]-= A[i][j]*y_effective[j];
00367     }    
00368   }
00369 
00370   for(int i=0; i<NUMBER_CHANNELS; i++){
00371     for (int j=0; j<NUMBER_CHANNELS; j++){   
00372       A[i][j]+=squash( (factor_a*eps*0.2) *xsi4Model[i] * y_effective[j]) ;
00373     }
00374   }
00375 };
00376 
00377   
00378 
00379   
00380   
00381 /// calculate delayed values
00382 virtual void calculateDelayedValues(double** source, paramval number_steps_of_delay_, double *target)  
00383 {
00384   // number_steps_of_delay_ must not be larger than BUFFER_SIZE
00385   assert ((int)number_steps_of_delay_ < BUFFER_SIZE);
00386 
00387   // get delayed value from ring buffer
00388 
00389   for (int i = 0; i < NUMBER_CHANNELS; i++)
00390     {
00391       target[i] = source[(t - (int)number_steps_of_delay_ + BUFFER_SIZE) % BUFFER_SIZE][i];
00392     }
00393 };
00394 
00395 virtual void calculateSmoothValues(double** source, paramval number_steps_for_averaging_, double *target)
00396 {
00397   // number_steps_for_averaging_ must not be larger than BUFFER_SIZE
00398   assert ((int)number_steps_for_averaging_ <= BUFFER_SIZE);
00399 
00400   for (int i = 0; i < NUMBER_CHANNELS; i++)
00401     {
00402       target[i] = 0.0;
00403       for (int k = 0; k < (int)number_steps_for_averaging_; k++)
00404         {
00405           target[i] += source[(t - k + BUFFER_SIZE) % BUFFER_SIZE][i]/ (double) (number_steps_for_averaging_);
00406         }
00407     }
00408 };
00409 
00410 
00411 
00412 /// calculate controller ouptus
00413 virtual void calculateControllerValues(double *x_smooth, double *y)  
00414 {
00415   double z[NUMBER_CHANNELS];
00416 
00417   for (int i = 0; i < NUMBER_CHANNELS; i++)
00418     {
00419       z[i] = h[i];
00420       for (int j = 0; j < NUMBER_CHANNELS; j++)
00421         {
00422           z[i] += C[i][j] * x_smooth[j];
00423         }
00424       y[i] = g(z[i]);
00425     }
00426 };
00427   
00428   
00429 // put new value in ring buffer
00430 virtual void putInBuffer(double** buffer, const double *values){  
00431   for (int i = 0; i < NUMBER_CHANNELS; i++)
00432     {
00433       buffer[(t+BUFFER_SIZE)% BUFFER_SIZE][i] = values[i];
00434     }
00435 };
00436 

Generated on Mon Aug 7 16:40:14 2006 for Robotsystem of the Robot Group Leipzig by  doxygen 1.4.7