openModeller
Version 1.4.0
|
00001 00035 #include <openmodeller/Random.hh> 00036 #include <math.h> 00037 #include <string.h> 00038 00039 #include <openmodeller/Log.hh> 00040 #include <openmodeller/Sample.hh> 00041 #include <openmodeller/Occurrence.hh> 00042 00043 #include <openmodeller/Exceptions.hh> 00044 00045 #include "rules_base.hh" 00046 00047 #define MIN_SIG_NO 10 00048 00049 #ifdef WIN32 00050 #ifndef INFINITY //in mingw this is already defined in math.h 00051 #define INFINITY 1000000000 00052 #endif 00053 #endif 00054 00055 #ifndef WIN32 00056 int min(int v1, int v2) 00057 { 00058 return (v1 < v2)? v1 : v2; 00059 } 00060 #endif 00061 00062 bool equalEps(double v1, double v2) 00063 { 00064 return (fabs(v1 - v2) < 0.000001); 00065 } 00066 00067 bool between(double value, double min, double max) 00068 { 00069 //Log::instance()->info("Between: %f <= %f <= %f? %d\n", min, value, max, ((value >= min) && (value <= max))); 00070 return ((value >= min) && (value <= max)); 00071 } 00072 00073 int membership(double value1, double value2, double value) 00074 { 00075 if (equalEps(value1, -1.0) && equalEps(value2, +1.0)) 00076 return 255; 00077 else if (value < value1 || value > value2) 00078 return 0; 00079 else 00080 return 1; 00081 } 00082 00083 00084 /****************************************************************/ 00085 /****************** GarpRule Constructor ************************/ 00086 00087 GarpRule::GarpRule() : 00088 _chrom1(), 00089 _chrom2(), 00090 _prediction(0), 00091 _numGenes(0), 00092 _needsEvaluation(true) 00093 { 00094 _origin = type(); 00095 for (int i = 0; i < 10; i++) 00096 _performance[i] = 0.0; 00097 } 00098 00099 GarpRule::GarpRule(int numGenes) : 00100 _chrom1(numGenes, -1.0), 00101 _chrom2(numGenes, +1.0), 00102 _prediction(0), 00103 _numGenes(numGenes), 00104 _needsEvaluation(true) 00105 { 00106 _origin = type(); 00107 00108 for (int i = 0; i < 10; i++) 00109 _performance[i] = 0.0; 00110 } 00111 00112 // ***************** 00113 GarpRule::GarpRule(Scalar prediction, int numGenes, 00114 const Sample& chrom1, const Sample& chrom2, 00115 const double * performances) : 00116 _chrom1(chrom1), 00117 _chrom2(chrom2), 00118 _prediction(prediction), 00119 _numGenes(numGenes), 00120 _needsEvaluation(true), 00121 _origin(OriginColonization) 00122 00123 { 00124 for (int i = 0; i < 10; i++) 00125 { *(_performance + i) = *(performances + i); } 00126 } 00127 00128 /****************************************************************/ 00129 /****************** GarpRule Destructor *************************/ 00130 00131 GarpRule::~GarpRule() 00132 { } 00133 00134 GarpRule * GarpRule::clone() const 00135 { 00136 GarpRule * newRule = objFactory(); 00137 00138 newRule->_numGenes = _numGenes; 00139 newRule->_chrom1 = _chrom1; 00140 newRule->_chrom2 = _chrom2; 00141 00142 for (int i = 0; i < 10; i++) 00143 newRule->_performance[i] = _performance[i]; 00144 00145 newRule->_needsEvaluation = _needsEvaluation; 00146 newRule->_prediction = _prediction; 00147 newRule->_origin = _origin; 00148 00149 return newRule; 00150 } 00151 00157 int GarpRule::copy(const GarpRule * fromRule) 00158 { 00159 if (type() != fromRule->type()) 00160 return 0; 00161 00162 _chrom1 = fromRule->_chrom1; 00163 _chrom2 = fromRule->_chrom2; 00164 00165 for (int i = 0; i < 10; i++) 00166 _performance[i] = fromRule->_performance[i]; 00167 00168 _needsEvaluation = fromRule->_needsEvaluation; 00169 _prediction = fromRule->_prediction; 00170 _origin = fromRule->_origin; 00171 00172 return 1; 00173 } 00174 00175 00176 // ========================================================================== 00177 double GarpRule::getPerformance(PerfIndex perfIndex) const 00178 { 00179 return _performance[perfIndex]; 00180 } 00181 00182 // ========================================================================== 00183 int GarpRule::getCertainty(Scalar pred) const 00184 { 00185 return (_prediction == pred); 00186 } 00187 00188 // ========================================================================== 00189 double GarpRule::getError(Scalar predefinedValue, Scalar pred) const 00190 { 00191 return (double) fabs(predefinedValue - pred); 00192 } 00193 00194 // ========================================================================== 00195 void GarpRule::adjustRange(Scalar& v1, Scalar& v2) const 00196 { 00197 if (v1 > +1.0) v1 = +1.0; 00198 if (v2 > +1.0) v2 = +1.0; 00199 if (v1 < -1.0) v1 = -1.0; 00200 if (v2 < -1.0) v2 = -1.0; 00201 00202 if (v1 > v2) 00203 { 00204 // swap v1 and v2 00205 Scalar aux = v1; 00206 v1 = v2; 00207 v2 = aux; 00208 } 00209 } 00210 00211 // ========================================================================== 00212 void GarpRule::crossover(GarpRule * rule, int xpt1, int xpt2) 00213 { 00214 int i, diff, aux; 00215 Scalar temp; 00216 00217 diff = 0; 00218 if (xpt1 > xpt2) 00219 { 00220 aux = xpt1; 00221 xpt1 = xpt2; 00222 xpt2 = aux; 00223 } 00224 00225 for ( i = xpt1; i < xpt2; i++) 00226 { 00227 temp = _chrom1[i]; 00228 _chrom1[i] = rule->_chrom1[i]; 00229 rule->_chrom1[i] = temp; 00230 diff += (_chrom1[i] != rule->_chrom1[i]); 00231 00232 temp = _chrom2[i]; 00233 _chrom2[i] = rule->_chrom2[i]; 00234 rule->_chrom2[i] = temp; 00235 00236 diff += (_chrom2[i] != rule->_chrom2[i]); 00237 } 00238 00239 if (diff) 00240 { 00241 _origin = OriginCrossover; 00242 forceEvaluation(); 00243 00244 rule->_origin = OriginCrossover; 00245 rule->forceEvaluation(); 00246 } 00247 } 00248 00249 // ========================================================================== 00250 void GarpRule::mutate(double temperature) 00251 { 00252 Scalar rnd1, rnd2; 00253 Random rnd; 00254 int j; 00255 00256 j = rnd.get(_numGenes); 00257 00258 rnd1 = rnd.get(-temperature, +temperature); 00259 rnd2 = rnd.get(-temperature, +temperature); 00260 00261 _chrom1[j] -= rnd1; 00262 _chrom2[j] += rnd2; 00263 00264 adjustRange(_chrom1[j], _chrom2[j]); 00265 00266 _needsEvaluation = true; 00267 _origin = OriginMutation; 00268 } 00269 00270 // ========================================================================== 00271 bool GarpRule::similar(const GarpRule * otherRule) const 00272 { 00273 if (type() != otherRule->type()) 00274 { return false; } 00275 00276 // check rule value (presence/absence) 00277 if (_prediction != otherRule->_prediction) 00278 { return false; } 00279 00280 for (int i = 0; i < _numGenes; i++) 00281 { 00282 // rA and rB indicates whether gene <i> is being used or not 00283 bool rA = (equalEps(_chrom1[i], -1.0) && 00284 equalEps(_chrom2[i], +1.0)); 00285 00286 bool rB = (equalEps(otherRule->_chrom1[i], -1.0) && 00287 equalEps(otherRule->_chrom2[i], +1.0)); 00288 00289 if ( rA != rB ) 00290 { return false; } 00291 } 00292 00293 return true; 00294 } 00295 00296 // ========================================================================== 00297 double GarpRule::evaluate(const OccurrencesPtr& occs) 00298 { 00299 int i; 00300 00301 double utility[10]; 00302 00303 //next line commented out since its not used 00304 //int dimension = (*occs)[0]->environment().size(); 00305 00306 int n = occs->numOccurrences(); 00307 00308 // value of dependent variable from the current sample point 00309 //next line commented out since its not used 00310 //Scalar pointValue = 0.0; 00311 00312 // 1 if rule applies to sample point, i.e., values satisfies rule precondition 00313 // 0 otherwise 00314 int strength; 00315 00316 // indicates whether rule has the same conclusion as the current point 00317 // (i.e., rule prediction is equal to value dependent variable) 00318 int certainty; 00319 00320 // error 00321 double error; 00322 00323 // number of points that rule applies to (sum of strength) 00324 int pXs; 00325 00326 // number of points with the same conclusion as the rule (sum of certainty) 00327 int pYs; 00328 00329 // sum of points correctly predicted by the rule 00330 int pXYs; 00331 00332 // number of points selected by the rule 00333 int no; 00334 00335 // other intermediate statistics 00336 double pYcXs, pYcs; 00337 00338 // prior probability 00339 double priorProb; 00340 00341 // note that pXSs is always equals to no, so it has been removed 00342 00343 // reset counters 00344 pXs = pYs = pXYs = no = 0; 00345 pYcXs = pYcs = 0.0; 00346 00347 // reset utility values 00348 for (i = 1; i < 10; i++) 00349 utility[i] = 0.0; 00350 00351 utility[0] = 1.0; 00352 00353 //FILE * flog = fopen("evaluate.log", "w"); 00354 00355 OccurrencesImpl::const_iterator it = occs->begin(); 00356 OccurrencesImpl::const_iterator end = occs->end(); 00357 00358 while (it != end) 00359 { 00360 // environmental (independent) variables values from current sample point 00361 Scalar pointValue = ( (*it)->abundance() > 0.0 ) ? 1.0 : 0.0; 00362 Sample sample = (*it)->environment(); 00363 00364 strength = getStrength(sample); 00365 certainty = getCertainty(pointValue); 00366 error = getError(0, pointValue); 00367 00368 pXs += strength; 00369 pYs += certainty; 00370 pYcs += error; 00371 00372 if (strength > 0) 00373 { 00374 no++; 00375 pXYs += certainty; // strength is always 1, then success == certainty 00376 pYcXs += getError(error, pointValue); 00377 } 00378 00379 ++it; 00380 00381 /* 00382 fprintf(flog, "Sample %5d: [%d %d %d] (%+3.2f) ", 00383 i, strength, certainty, min(strength, certainty), pointValue); 00384 for (u = 0; u < dimension; u++) 00385 { 00386 fprintf(flog, "%+8.4f ", values[u]); 00387 } 00388 fprintf(flog, "\n"); 00389 */ 00390 } 00391 00392 if (no != pXs) 00393 { 00394 Log::instance()->error("Assertion failed (no != pXs): %d != %d", no, pXs); 00395 throw AlgorithmException("Assertion failed"); 00396 } 00397 00398 // Priors 00399 utility[1] = pXs / (double) n; // proportion 00400 utility[2] = pYs / (double) n; // Prior probability 00401 utility[3] = pYcs / (double) n; 00402 00403 priorProb = utility[2]; 00404 00405 if (no > 0) 00406 { 00407 utility[4] = no / (double) n; // Posterior strength 00408 utility[5] = pXYs / (double) no; // Posterior probability 00409 utility[6] = pYcXs / no; 00410 utility[7] = no / (double) n; // Coverage 00411 } 00412 00413 // Crisp Significance 00414 if ( (no >= MIN_SIG_NO) && (priorProb > 0) && (priorProb < 1.0)) 00415 { 00416 utility[8] = (pXYs - priorProb * no) / 00417 sqrt( no * priorProb * (1.0 - priorProb)); 00418 } 00419 00420 //flags!!! not implemented yet 00421 //if (Postflag) 00422 utility[0] *= utility[5]; 00423 00424 //if (Sigflag) 00425 utility[0] *= utility[8]; 00426 00427 //printf("%c] u0=%+7.3f u1=%+7.3f u2=%+7.3f u8=%+7.3f pXYs=%5d no=%4d n=%4d\n", 00428 // type(), utility[0], utility[1], utility[2], utility[8], pXYs, no, n); 00429 00430 //if (Compflag) utility[0] *= utility[7]; 00431 //if (Ecoflag) utility[0] *= ecoSpace(); 00432 //if (Lengthflag) utility[0] /= length(); 00433 00434 // Record performance in rule 00435 for (i = 0; i < 10; i++) 00436 _performance[i] = utility[i]; 00437 00438 00439 /* 00440 fprintf(flog, "Sums (pXs, pYs, pXYs, n, no): %5d %5d %5d %5d %5d ([%c] pred=%+3.1f sig=%+10.4f)\n", 00441 pXs, pYs, pXYs, n, no, type(), _prediction, utility[8]); 00442 00443 if ((utility[8] > 15) && (_prediction == 0.0)) 00444 { 00445 Scalar *gen = _genes; 00446 Scalar *end = gen + 2 * _numGenes; 00447 fprintf(flog, "Genes: "); 00448 while ( gen < end ) 00449 fprintf(flog, "%+8.4f ", *gen++ ); 00450 00451 fprintf(flog, "- (%.2f) : %f\n", _prediction, getPerformance(PerfSig) ); 00452 00453 fclose(flog); 00454 Log::instance()->error(1, "Check rule statistics"); //DEBUG 00455 } 00456 fclose(flog); 00457 */ 00458 00459 /* 00460 static int ii = 0; 00461 if (utility[8] < 2.7) 00462 printf("%4d] Rule performance is low: %+6.2f\n", ++ii, utility[8]); 00463 */ 00464 00465 return (utility[0]); 00466 } 00467 00468 // ========================================================================== 00469 void GarpRule::log() 00470 { 00471 for (int i = 0; i < _numGenes * 2; i += 2) 00472 { 00473 if (fabs(_chrom1[i] - _chrom2[i]) >= 2.0) 00474 Log::instance()->info( "****** ****** "); 00475 else 00476 Log::instance()->info( "%+6.2f %+6.2f ", _chrom1[i], _chrom2[i] ); 00477 } 00478 00479 Log::instance()->info( "- (%.2f) : %f\n", _prediction, getPerformance(PerfSig)); 00480 } 00481 00482 // ==========================================================================