openModeller  Version 1.4.0
rules_base.cpp
Go to the documentation of this file.
00001 
00035 #include <openmodeller/Random.hh>
00036 #include <math.h>
00037 #include <string.h>
00038 
00039 #include <openmodeller/Log.hh>
00040 #include <openmodeller/Sample.hh>
00041 #include <openmodeller/Occurrence.hh>
00042 
00043 #include <openmodeller/Exceptions.hh>
00044 
00045 #include "rules_base.hh"
00046 
00047 #define MIN_SIG_NO 10
00048 
00049 #ifdef WIN32
00050 #ifndef INFINITY //in mingw this is already defined in math.h
00051 #define INFINITY 1000000000
00052 #endif
00053 #endif
00054 
00055 #ifndef WIN32
00056 int min(int v1, int v2)
00057 {
00058   return (v1 < v2)? v1 : v2;
00059 }
00060 #endif
00061 
00062 bool equalEps(double v1, double v2)
00063 {
00064   return (fabs(v1 - v2) < 0.000001);
00065 }
00066 
00067 bool between(double value, double min, double max)
00068 {
00069   //Log::instance()->info("Between: %f <= %f <= %f? %d\n", min, value, max, ((value >= min) && (value <= max)));
00070   return ((value >= min) && (value <= max));
00071 }
00072 
00073 int membership(double value1, double value2, double value)
00074 {
00075   if (equalEps(value1, -1.0) && equalEps(value2, +1.0))
00076     return 255; 
00077   else if (value < value1 || value > value2)
00078     return 0; 
00079   else
00080     return 1; 
00081 }
00082 
00083 
00084 /****************************************************************/
00085 /****************** GarpRule Constructor ************************/
00086 
00087 GarpRule::GarpRule() :
00088   _chrom1(), 
00089   _chrom2(),
00090   _prediction(0),
00091   _numGenes(0),
00092   _needsEvaluation(true)
00093 {
00094   _origin = type();
00095   for (int i = 0; i < 10; i++)
00096     _performance[i] = 0.0;
00097 }
00098 
00099 GarpRule::GarpRule(int numGenes) :
00100   _chrom1(numGenes, -1.0), 
00101   _chrom2(numGenes, +1.0),
00102   _prediction(0),
00103   _numGenes(numGenes),
00104   _needsEvaluation(true)
00105 {
00106   _origin = type();
00107 
00108   for (int i = 0; i < 10; i++)
00109     _performance[i] = 0.0;
00110 }
00111   
00112 // *****************
00113 GarpRule::GarpRule(Scalar prediction, int numGenes, 
00114            const Sample& chrom1, const Sample& chrom2, 
00115            const double * performances) :
00116   _chrom1(chrom1), 
00117   _chrom2(chrom2),
00118   _prediction(prediction),
00119   _numGenes(numGenes),
00120   _needsEvaluation(true),
00121   _origin(OriginColonization)
00122   
00123 {
00124   for (int i = 0; i < 10; i++)
00125   { *(_performance + i) = *(performances + i); }
00126 }
00127   
00128 /****************************************************************/
00129 /****************** GarpRule Destructor *************************/
00130 
00131 GarpRule::~GarpRule() 
00132 { }
00133 
00134 GarpRule * GarpRule::clone() const
00135 {
00136   GarpRule * newRule = objFactory();
00137 
00138   newRule->_numGenes = _numGenes;
00139   newRule->_chrom1 = _chrom1;
00140   newRule->_chrom2 = _chrom2;
00141   
00142   for (int i = 0; i < 10; i++) 
00143     newRule->_performance[i] = _performance[i];
00144   
00145   newRule->_needsEvaluation = _needsEvaluation;
00146   newRule->_prediction = _prediction;
00147   newRule->_origin = _origin;
00148   
00149   return newRule;
00150 }
00151 
00157 int GarpRule::copy(const GarpRule * fromRule)
00158 {
00159   if (type() != fromRule->type())
00160     return 0;
00161   
00162   _chrom1 = fromRule->_chrom1;
00163   _chrom2 = fromRule->_chrom2;
00164   
00165   for (int i = 0; i < 10; i++) 
00166     _performance[i] = fromRule->_performance[i];
00167   
00168   _needsEvaluation = fromRule->_needsEvaluation;
00169   _prediction = fromRule->_prediction;
00170   _origin = fromRule->_origin;
00171   
00172   return 1;
00173 }
00174 
00175 
00176 // ==========================================================================
00177 double GarpRule::getPerformance(PerfIndex perfIndex) const
00178 {
00179   return _performance[perfIndex];
00180 }
00181 
00182 // ==========================================================================
00183 int GarpRule::getCertainty(Scalar pred) const
00184 { 
00185   return (_prediction == pred); 
00186 }
00187 
00188 // ==========================================================================
00189 double GarpRule::getError(Scalar predefinedValue, Scalar pred) const
00190 { 
00191   return (double) fabs(predefinedValue - pred); 
00192 }
00193  
00194 // ==========================================================================
00195 void GarpRule::adjustRange(Scalar& v1, Scalar& v2) const
00196 {
00197   if (v1 > +1.0) v1 = +1.0;
00198   if (v2 > +1.0) v2 = +1.0;
00199   if (v1 < -1.0) v1 = -1.0;
00200   if (v2 < -1.0) v2 = -1.0;
00201 
00202   if (v1 > v2)
00203     {
00204       // swap v1 and v2
00205       Scalar aux = v1;
00206       v1 = v2;
00207       v2 = aux;
00208     }
00209 }
00210 
00211 // ==========================================================================
00212 void GarpRule::crossover(GarpRule * rule, int xpt1, int xpt2)
00213 {
00214   int i, diff, aux;
00215   Scalar temp;
00216   
00217   diff = 0;
00218   if (xpt1 > xpt2)
00219   {
00220     aux = xpt1;
00221     xpt1 = xpt2;
00222     xpt2 = aux;
00223   }
00224 
00225   for ( i = xpt1; i < xpt2; i++)
00226   {
00227     temp = _chrom1[i]; 
00228     _chrom1[i] = rule->_chrom1[i];
00229     rule->_chrom1[i] = temp;
00230     diff += (_chrom1[i] != rule->_chrom1[i]);
00231 
00232     temp = _chrom2[i]; 
00233     _chrom2[i] = rule->_chrom2[i];
00234     rule->_chrom2[i] = temp;
00235     
00236     diff += (_chrom2[i] != rule->_chrom2[i]);
00237   }
00238 
00239   if (diff)
00240   {
00241     _origin = OriginCrossover;
00242     forceEvaluation();
00243 
00244     rule->_origin = OriginCrossover;
00245     rule->forceEvaluation();
00246   }
00247 }
00248 
00249 // ==========================================================================
00250 void GarpRule::mutate(double temperature)
00251 {
00252   Scalar rnd1, rnd2;
00253   Random rnd;
00254   int j;
00255   
00256   j = rnd.get(_numGenes);
00257 
00258   rnd1 = rnd.get(-temperature, +temperature);
00259   rnd2 = rnd.get(-temperature, +temperature);
00260 
00261   _chrom1[j] -= rnd1;
00262   _chrom2[j] += rnd2;
00263 
00264   adjustRange(_chrom1[j], _chrom2[j]);
00265   
00266   _needsEvaluation = true;
00267   _origin = OriginMutation;
00268 }
00269 
00270 // ==========================================================================
00271 bool GarpRule::similar(const GarpRule * otherRule) const
00272 {
00273   if (type() != otherRule->type())
00274     { return false; }
00275 
00276   // check rule value (presence/absence)
00277   if (_prediction != otherRule->_prediction) 
00278   {  return false; }
00279   
00280   for (int i = 0; i < _numGenes; i++)
00281     {
00282       // rA and rB indicates whether gene <i> is being used or not
00283     bool rA = (equalEps(_chrom1[i], -1.0) && 
00284                equalEps(_chrom2[i], +1.0));
00285       
00286     bool rB = (equalEps(otherRule->_chrom1[i], -1.0) && 
00287                equalEps(otherRule->_chrom2[i], +1.0));
00288       
00289       if ( rA != rB )
00290     {  return false; }
00291     }
00292   
00293   return true;
00294 }
00295 
00296 // ==========================================================================
00297 double GarpRule::evaluate(const OccurrencesPtr& occs)
00298 {
00299   int i;
00300 
00301   double utility[10];
00302 
00303   //next line commented out since its not used
00304   //int dimension = (*occs)[0]->environment().size();
00305 
00306   int n = occs->numOccurrences();
00307 
00308   // value of dependent variable from the current sample point
00309   //next line commented out since its not used
00310   //Scalar pointValue = 0.0;
00311 
00312   // 1 if rule applies to sample point, i.e., values satisfies rule precondition
00313   // 0 otherwise
00314   int strength;
00315 
00316   // indicates whether rule has the same conclusion as the current point  
00317   // (i.e., rule prediction is equal to value dependent variable)
00318   int certainty;
00319 
00320   // error
00321   double error;
00322 
00323   // number of points that rule applies to (sum of strength)
00324   int pXs;
00325 
00326   // number of points with the same conclusion as the rule (sum of certainty)
00327   int pYs;
00328 
00329   // sum of points correctly predicted by the rule
00330   int pXYs;
00331 
00332   // number of points selected by the rule
00333   int no;
00334 
00335   // other intermediate statistics
00336   double pYcXs, pYcs;
00337 
00338   // prior probability
00339   double priorProb;
00340 
00341   // note that pXSs is always equals to no, so it has been removed
00342 
00343   // reset counters
00344   pXs = pYs = pXYs = no = 0;
00345   pYcXs = pYcs = 0.0;
00346 
00347   // reset utility values
00348   for (i = 1; i < 10; i++) 
00349     utility[i] = 0.0;
00350   
00351   utility[0] = 1.0;
00352 
00353   //FILE * flog = fopen("evaluate.log", "w");
00354 
00355   OccurrencesImpl::const_iterator it  = occs->begin();
00356   OccurrencesImpl::const_iterator end = occs->end();
00357 
00358   while (it != end)
00359     { 
00360       // environmental (independent) variables values from current sample point
00361       Scalar pointValue = ( (*it)->abundance() > 0.0 ) ? 1.0 : 0.0;
00362       Sample sample = (*it)->environment();
00363 
00364       strength = getStrength(sample);
00365       certainty = getCertainty(pointValue);
00366       error = getError(0, pointValue);
00367 
00368       pXs  += strength;
00369       pYs  += certainty;
00370       pYcs += error;
00371       
00372       if (strength > 0)
00373       {
00374         no++;
00375         pXYs += certainty;  // strength is always 1, then success == certainty
00376         pYcXs += getError(error, pointValue);
00377       }
00378 
00379       ++it;
00380 
00381       /*
00382       fprintf(flog, "Sample %5d: [%d %d %d] (%+3.2f) ", 
00383       i, strength, certainty, min(strength, certainty), pointValue);
00384       for (u = 0; u < dimension; u++)
00385   { 
00386     fprintf(flog, "%+8.4f ", values[u]);
00387   }
00388       fprintf(flog, "\n");
00389       */
00390     }
00391 
00392   if (no != pXs)
00393     {
00394       Log::instance()->error("Assertion failed (no != pXs): %d != %d", no, pXs);
00395       throw AlgorithmException("Assertion failed");
00396     }
00397   
00398   // Priors
00399   utility[1] = pXs  / (double) n;   // proportion 
00400   utility[2] = pYs  / (double) n;   // Prior probability
00401   utility[3] = pYcs / (double) n;
00402   
00403   priorProb = utility[2];
00404 
00405   if (no > 0)
00406     {
00407       utility[4] = no    / (double) n;          // Posterior strength
00408       utility[5] = pXYs  / (double) no;   // Posterior probability
00409       utility[6] = pYcXs / no;
00410       utility[7] = no    / (double) n;          // Coverage
00411     }
00412 
00413   // Crisp Significance
00414   if ( (no >= MIN_SIG_NO) && (priorProb > 0) && (priorProb < 1.0))  
00415     {
00416       utility[8] = (pXYs - priorProb * no) / 
00417   sqrt( no * priorProb * (1.0 - priorProb));
00418     }
00419 
00420   //flags!!! not implemented yet
00421   //if (Postflag)   
00422   utility[0] *= utility[5];
00423   
00424   //if (Sigflag)  
00425   utility[0] *= utility[8];
00426 
00427   //printf("%c] u0=%+7.3f u1=%+7.3f u2=%+7.3f u8=%+7.3f pXYs=%5d no=%4d n=%4d\n",
00428   // type(), utility[0], utility[1], utility[2], utility[8], pXYs, no, n);
00429 
00430   //if (Compflag) utility[0] *= utility[7];
00431   //if (Ecoflag) utility[0] *= ecoSpace();
00432   //if (Lengthflag) utility[0] /= length();
00433   
00434   // Record performance in rule
00435   for (i = 0; i < 10; i++) 
00436     _performance[i] = utility[i];
00437 
00438 
00439   /*
00440   fprintf(flog, "Sums (pXs, pYs, pXYs, n, no): %5d %5d %5d %5d %5d ([%c] pred=%+3.1f sig=%+10.4f)\n", 
00441     pXs, pYs, pXYs, n, no, type(), _prediction, utility[8]);
00442 
00443   if ((utility[8] > 15) && (_prediction == 0.0))
00444     {
00445       Scalar *gen = _genes;
00446       Scalar *end = gen + 2 * _numGenes;
00447       fprintf(flog, "Genes: ");
00448       while ( gen < end )
00449   fprintf(flog, "%+8.4f ", *gen++ );
00450       
00451       fprintf(flog, "- (%.2f) : %f\n", _prediction, getPerformance(PerfSig) );
00452 
00453       fclose(flog);
00454       Log::instance()->error(1, "Check rule statistics"); //DEBUG
00455     }
00456   fclose(flog);
00457   */
00458 
00459   /*
00460   static int ii = 0;
00461   if (utility[8] < 2.7)
00462     printf("%4d] Rule performance is low: %+6.2f\n", ++ii, utility[8]);
00463   */
00464 
00465   return (utility[0]);
00466 }
00467 
00468 // ==========================================================================
00469 void GarpRule::log()
00470 {
00471   for (int i = 0; i < _numGenes * 2; i += 2)
00472     {
00473       if (fabs(_chrom1[i] - _chrom2[i]) >= 2.0)
00474   Log::instance()->info( "****** ****** ");
00475       else
00476         Log::instance()->info( "%+6.2f %+6.2f ", _chrom1[i], _chrom2[i] );
00477     }
00478 
00479   Log::instance()->info( "- (%.2f) : %f\n", _prediction, getPerformance(PerfSig));
00480 }
00481 
00482 // ==========================================================================