openModeller
Version 1.4.0
|
00001 00027 #include "consensus.hh" 00028 00029 #include <string> 00030 #include <algorithm> 00031 #include <sstream> 00032 00033 using namespace std; 00034 00035 /****************************************************************/ 00036 /********************** Algorithm's Metadata ********************/ 00037 00038 #define NUM_PARAM 7 00039 #define MAX_ALGORITHMS 5 00040 00041 #define CONSENSUS_LOG_PREFIX "Consensus: " 00042 00043 /******************************/ 00044 /*** Algorithm's parameters ***/ 00045 00046 static AlgParamMetadata parameters[NUM_PARAM] = { 00047 00048 // Algorithm 1 00049 { 00050 "Alg1", // Id. 00051 "Algorithm1", // Name. 00052 String, // Type. 00053 "Algorithm 1", // Overview 00054 "First algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface.", // Description. 00055 0, // Not zero if the parameter has lower limit. 00056 0, // Parameter's lower limit. 00057 0, // Not zero if the parameter has upper limit. 00058 0, // Parameter's upper limit. 00059 "RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=1)" // Parameter's typical (default) value. 00060 }, 00061 // Algorithm 2 00062 { 00063 "Alg2", // Id. 00064 "Algorithm2", // Name. 00065 String, // Type. 00066 "Algorithm 2", // Overview 00067 "Second algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description. 00068 0, // Not zero if the parameter has lower limit. 00069 0, // Parameter's lower limit. 00070 0, // Not zero if the parameter has upper limit. 00071 0, // Parameter's upper limit. 00072 "" // Parameter's typical (default) value. 00073 }, 00074 // Algorithm 3 00075 { 00076 "Alg3", // Id. 00077 "Algorithm3", // Name. 00078 String, // Type. 00079 "Algorithm 3", // Overview 00080 "Third algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description. 00081 0, // Not zero if the parameter has lower limit. 00082 0, // Parameter's lower limit. 00083 0, // Not zero if the parameter has upper limit. 00084 0, // Parameter's upper limit. 00085 "" // Parameter's typical (default) value. 00086 }, 00087 // Algorithm 4 00088 { 00089 "Alg4", // Id. 00090 "Algorithm4", // Name. 00091 String, // Type. 00092 "Algorithm 4", // Overview 00093 "Fourth algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description. 00094 0, // Not zero if the parameter has lower limit. 00095 0, // Parameter's lower limit. 00096 0, // Not zero if the parameter has upper limit. 00097 0, // Parameter's upper limit. 00098 "" // Parameter's typical (default) value. 00099 }, 00100 // Algorithm 5 00101 { 00102 "Alg5", // Id. 00103 "Algorithm5", // Name. 00104 String, // Type. 00105 "Algorithm 5", // Overview 00106 "Fifth algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description. 00107 0, // Not zero if the parameter has lower limit. 00108 0, // Parameter's lower limit. 00109 0, // Not zero if the parameter has upper limit. 00110 0, // Parameter's upper limit. 00111 "" // Parameter's typical (default) value. 00112 }, 00113 // Weigths 00114 { 00115 "Weights", // Id. 00116 "Weights", // Name. 00117 String, // Type. 00118 "Weights", // Overview 00119 "Sequence of weights, each one related to the corresponding algorithm, separated by space. This can be used to give more importance to certain algorithms. Use dot as decimal separator.", // Description. 00120 0, // Not zero if the parameter has lower limit. 00121 0, // Parameter's lower limit. 00122 0, // Not zero if the parameter has upper limit. 00123 0, // Parameter's upper limit. 00124 "1.0 0.0 0.0 0.0 0.0" // Parameter's typical (default) value. 00125 }, 00126 // Minimum level of agreement 00127 { 00128 "Agreement", // Id. 00129 "Agreement", // Name. 00130 Integer, // Type. 00131 "Minimum level of agreement", // Overview 00132 "Minimum level of agreement between the algorithms. Only predictions that are agreed between the specified number of algorithms will be returned as a positive value.", // Description. 00133 1, // Not zero if the parameter has lower limit. 00134 1, // Parameter's lower limit. 00135 1, // Not zero if the parameter has upper limit. 00136 5, // Parameter's upper limit. 00137 "1" // Parameter's typical (default) value. 00138 }, 00139 }; 00140 00141 /************************************/ 00142 /*** Algorithm's general metadata ***/ 00143 00144 static AlgMetadata metadata = { 00145 00146 "CONSENSUS", // Id. 00147 "Consensus", // Name. 00148 "0.2", // Version. 00149 00150 // Overview 00151 "Builds a consensus model with the specified algorithms", 00152 00153 // Description. 00154 "This is a kind of meta algorithm that receives other algorithms as parameters so that it can generate the individual models and then merge the results into an aggregated model. The maximum number of algorithms is limited to 5. Leave the algorithm parameter blank if you want to use fewer algorithms. IMPORTANT: To specify an algorithm you need to know the algorithm id and its parameters names in openModeller (you can do this by inspecting the request.txt file that comes as an exemple in the command-line interface). Before merging the models, each individual model is transformed into a binary model using the lowest presence threshold. You can assign different weights to each algorithm and also specify the minimum level of agreement between the algorithms. A minimum level of 3 when 5 algorithms are used means that, when less than 3 algorithms agree on a prediction, the result will be zero, so the final model only shows areas where the specified number of algorithms agree on the prediction.", 00155 00156 "Renato De Giovanni", // Algorithm author. 00157 "", // Bibliography. 00158 00159 "Renato De Giovanni", // Code author. 00160 "renato [at] cria . org . br", // Code author's contact. 00161 00162 0, // Does not accept categorical data. 00163 0, // Does not need (pseudo)absence points. 00164 00165 NUM_PARAM, // Algorithm's parameters. 00166 parameters 00167 }; 00168 00169 /****************************************************************/ 00170 /****************** Algorithm's factory function ****************/ 00171 00172 OM_ALG_DLL_EXPORT 00173 AlgorithmImpl * 00174 algorithmFactory() 00175 { 00176 return new ConsensusAlgorithm(); 00177 } 00178 00179 OM_ALG_DLL_EXPORT 00180 AlgMetadata const * 00181 algorithmMetadata() 00182 { 00183 return &metadata; 00184 } 00185 00186 00187 /*********************************************/ 00188 /************** SVM algorithm ****************/ 00189 00190 /*******************/ 00191 /*** constructor ***/ 00192 00193 ConsensusAlgorithm::ConsensusAlgorithm() : 00194 AlgorithmImpl( &metadata ), 00195 _done( false ), 00196 _initialized( false ), 00197 _num_algs( 0 ), 00198 _agreement( 1 ) 00199 { 00200 } 00201 00202 00203 /******************/ 00204 /*** destructor ***/ 00205 00206 ConsensusAlgorithm::~ConsensusAlgorithm() 00207 { 00208 for ( int i=0; i < (int)_algs.size(); i++ ) { 00209 00210 if ( _norms[i] ) { 00211 00212 delete _norms[i]; 00213 } 00214 } 00215 } 00216 00217 /**************************/ 00218 /*** need Normalization ***/ 00219 int ConsensusAlgorithm::needNormalization() 00220 { 00221 return 0; 00222 } 00223 00224 /******************/ 00225 /*** initialize ***/ 00226 int 00227 ConsensusAlgorithm::initialize() 00228 { 00229 std::string alg; 00230 00231 if ( getParameter( "Alg1", &alg ) ) { 00232 00233 if ( !_setAlgorithm( alg ) ) return 0; 00234 } 00235 00236 if ( getParameter( "Alg2", &alg ) ) { 00237 00238 if ( !_setAlgorithm( alg ) ) return 0; 00239 } 00240 00241 if ( getParameter( "Alg3", &alg ) ) { 00242 00243 if ( !_setAlgorithm( alg ) ) return 0; 00244 } 00245 00246 if ( getParameter( "Alg4", &alg ) ) { 00247 00248 if ( !_setAlgorithm( alg ) ) return 0; 00249 } 00250 00251 if ( getParameter( "Alg5", &alg ) ) { 00252 00253 if ( !_setAlgorithm( alg ) ) return 0; 00254 } 00255 00256 _num_algs = (int)_algs.size(); 00257 00258 if ( _num_algs == 0 ) { 00259 00260 Log::instance()->error( CONSENSUS_LOG_PREFIX "Consensus needs at least one algorithm. No algorithm could be instantiated based on the parameters.\n" ); 00261 return 0; 00262 } 00263 00264 if ( ! getParameter( "Agreement", &_agreement ) ) { 00265 00266 _agreement = _num_algs; // default value 00267 } 00268 else { 00269 00270 if ( _agreement < 1 || _agreement > _num_algs ) { 00271 00272 _agreement = _num_algs; 00273 } 00274 } 00275 00276 _thresholds = Sample(MAX_ALGORITHMS, 1.0); // start with maximum threshold 00277 00278 _weights.resize(MAX_ALGORITHMS); 00279 00280 std::string weights_param; 00281 00282 int nw = 0; 00283 00284 _sum_weights = 0.0; 00285 00286 if ( getParameter( "Weights", &weights_param ) ) { 00287 00288 stringstream ss(weights_param); 00289 string weight; 00290 double weight_val; 00291 while ( getline(ss, weight, ' ') ) { 00292 00293 weight_val = 1.0; 00294 sscanf( weight.c_str(), "%lf", &weight_val ); 00295 _weights[nw] = weight_val; 00296 _sum_weights += weight_val; 00297 ++nw; 00298 00299 if ( nw == MAX_ALGORITHMS ) { 00300 break; 00301 } 00302 } 00303 } 00304 00305 for ( int i=nw; i < MAX_ALGORITHMS; ++i ) { 00306 00307 _weights[i] = 1.0; 00308 _sum_weights += 1.0; 00309 } 00310 00311 for ( int j=0; j < _num_algs; j++ ) { 00312 00313 SamplerPtr fresh_sampler = cloneSampler(_samp); 00314 00315 if ( _algs[j]->needNormalization() ) { 00316 00317 fresh_sampler->normalize( _algs[j]->getNormalizer() ); 00318 } 00319 00320 _algs[j]->setSampler( fresh_sampler ); 00321 _algs[j]->initialize(); 00322 } 00323 00324 return 1; 00325 } 00326 00327 /*********************/ 00328 /*** set Algorithm ***/ 00329 bool 00330 ConsensusAlgorithm::_setAlgorithm( std::string alg_str ) 00331 { 00332 // Remove spaces 00333 alg_str.erase( std::remove_if( alg_str.begin(), alg_str.end(), ::isspace ), alg_str.end() ); 00334 00335 if ( alg_str.size() == 0 ) { 00336 00337 // Empty alg. Do nothing. 00338 return true; 00339 } 00340 00341 size_t ini_p = alg_str.find( "(" ); 00342 00343 // No parentheses 00344 if ( ini_p == string::npos ) { 00345 00346 // means no parameters, so just instantiate the algorithm 00347 AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( alg_str ); 00348 00349 _algs.push_back( alg ); 00350 00351 _norms.push_back( alg->getNormalizer() ); 00352 00353 return true; 00354 } 00355 00356 // There are parentheses 00357 00358 // extract ID 00359 std::string alg_id = alg_str.substr(0, ini_p); 00360 00361 // get parameters 00362 size_t end_p = alg_str.find( ")" ); 00363 00364 if ( end_p == string::npos ) { 00365 00366 Log::instance()->error( CONSENSUS_LOG_PREFIX "Missing parenthesis in algorithm parameters.\n" ); 00367 return false; 00368 } 00369 else if ( end_p < ini_p ) { 00370 00371 Log::instance()->error( CONSENSUS_LOG_PREFIX "Mismatching parenthesis in algoroithm parameters.\n" ); 00372 return false; 00373 } 00374 00375 std::string alg_params = alg_str.substr(ini_p + 1, end_p - ini_p -1); 00376 00377 vector<string> pairs; 00378 stringstream ss(alg_params); 00379 string pair; 00380 int nparam = 0; 00381 while ( getline(ss, pair, ',') ) { 00382 00383 pairs.push_back(pair); 00384 ++nparam; 00385 } 00386 00387 ParamSetType params; 00388 00389 for ( int i = 0; i < nparam; i++) { 00390 00391 size_t eq = pairs[i].find( "=" ); 00392 00393 if ( eq == string::npos || eq == 0 ) { 00394 00395 Log::instance()->error( CONSENSUS_LOG_PREFIX "Algorithm parameter failed to match key=value pair format.\n" ); 00396 return false; 00397 } 00398 00399 std::string param_id = pairs[i].substr(0, eq); 00400 std::string param_val = pairs[i].substr(eq+1); 00401 00402 params.insert( std::pair<icstring,std::string>(param_id, param_val) ); 00403 } 00404 00405 AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( alg_id ); 00406 00407 alg->setParameters( params ); 00408 00409 _algs.push_back( alg ); 00410 00411 _norms.push_back( alg->getNormalizer() ); 00412 00413 return true; 00414 } 00415 00416 /***************/ 00417 /*** iterate ***/ 00418 int 00419 ConsensusAlgorithm::iterate() 00420 { 00421 _done = true; 00422 00423 for ( int j=0; j < _num_algs; j++ ) { 00424 00425 if ( ! _algs[j]->done() ) { 00426 00427 _done = false; 00428 00429 if ( ! _algs[j]->iterate() ) { 00430 00431 return 0; 00432 } 00433 } 00434 } 00435 00436 // get LPT 00437 if ( _done ) { 00438 00439 OccurrencesPtr presences = _samp->getPresences(); 00440 00441 OccurrencesImpl::const_iterator p_iterator; 00442 OccurrencesImpl::const_iterator p_end; 00443 00444 Scalar val; 00445 00446 while ( p_iterator != p_end ) { 00447 00448 Sample env = (*p_iterator)->environment(); 00449 00450 for ( int j=0; j < _num_algs; j++ ) { 00451 00452 if ( _norms[j] ) { 00453 00454 Sample mysamp = Sample( env ); // deep copy 00455 _norms[j]->normalize( &mysamp ); 00456 val = _algs[j]->getValue( mysamp ); 00457 } 00458 else { 00459 00460 val = _algs[j]->getValue( env ); 00461 } 00462 00463 if ( val < _thresholds[j] && val > 0.0 ) { 00464 00465 _thresholds[j] = val; 00466 } 00467 } 00468 00469 ++p_iterator; 00470 } 00471 } 00472 00473 return 1; 00474 } 00475 00476 /********************/ 00477 /*** get Progress ***/ 00478 float ConsensusAlgorithm::getProgress() const 00479 { 00480 float progress = 0.0; 00481 00482 for ( int j=0; j < _num_algs; j++ ) { 00483 00484 progress += _algs[j]->getProgress(); 00485 } 00486 00487 return progress/(float)_num_algs; 00488 } 00489 00490 00491 /************/ 00492 /*** done ***/ 00493 int 00494 ConsensusAlgorithm::done() const 00495 { 00496 return _done; 00497 } 00498 00499 /*****************/ 00500 /*** get Value ***/ 00501 Scalar 00502 ConsensusAlgorithm::getValue( const Sample& x ) const 00503 { 00504 Scalar prob = 0.0; 00505 Scalar v; 00506 int agree = 0; 00507 00508 for ( int i=0; i < _num_algs; i++ ) { 00509 00510 if ( _norms[i] ) { 00511 00512 Sample y( x ); 00513 _norms[i]->normalize( &y ); 00514 v = _algs[i]->getValue( y ); 00515 } 00516 else { 00517 00518 v = _algs[i]->getValue( x ); 00519 } 00520 00521 if ( v >= _thresholds[i] ) { 00522 00523 prob += 1.0 * _weights[i]; 00524 agree++; 00525 } 00526 } 00527 00528 if ( agree < _agreement ) { 00529 00530 return 0.0; 00531 } 00532 00533 return prob/_sum_weights; 00534 } 00535 00536 /***********************/ 00537 /*** get Convergence ***/ 00538 int 00539 ConsensusAlgorithm::getConvergence( Scalar * const val ) const 00540 { 00541 *val = 1.0; 00542 return 1; 00543 } 00544 00545 /****************************************************************/ 00546 /****************** configuration *******************************/ 00547 void 00548 ConsensusAlgorithm::_getConfiguration( ConfigurationPtr& config ) const 00549 { 00550 if ( ! _done ) 00551 return; 00552 00553 ConfigurationPtr model_config( new ConfigurationImpl("Consensus") ); 00554 config->addSubsection( model_config ); 00555 00556 model_config->addNameValue( "Thresholds", _thresholds ); 00557 00558 ConfigurationPtr algs_config( new ConfigurationImpl("Algorithms") ); 00559 model_config->addSubsection( algs_config ); 00560 00561 for ( int i=0; i < _num_algs; i++ ) { 00562 00563 ConfigurationPtr alg_config = _algs[i]->getConfiguration(); 00564 algs_config->addSubsection( alg_config ); 00565 } 00566 } 00567 00568 void 00569 ConsensusAlgorithm::_setConfiguration( const ConstConfigurationPtr& config ) 00570 { 00571 ConstConfigurationPtr model_config = config->getSubsection( "Consensus", false ); 00572 00573 if ( ! model_config ) 00574 return; 00575 00576 if ( ! getParameter("Agreement", &_agreement) ) { 00577 00578 Log::instance()->error("Parameter 'Agreement' was not found in serialized model.\n"); 00579 return; 00580 } 00581 else { 00582 00583 if ( _agreement < 1 || _agreement > MAX_ALGORITHMS ) { 00584 00585 _agreement = 2; 00586 } 00587 } 00588 00589 _weights.resize(MAX_ALGORITHMS); 00590 00591 std::string weights_param; 00592 00593 int nw = 0; 00594 00595 _sum_weights = 0.0; 00596 00597 if ( ! getParameter( "Weights", &weights_param ) ) { 00598 00599 Log::instance()->error("Parameter 'Weights' was not found in serialized model.\n"); 00600 return; 00601 } 00602 else { 00603 00604 stringstream ss(weights_param); 00605 string weight; 00606 double weight_val; 00607 while ( getline(ss, weight, ' ') ) { 00608 00609 weight_val = 1.0; 00610 sscanf( weight.c_str(), "%lf", &weight_val ); 00611 _weights[nw] = weight_val; 00612 _sum_weights += weight_val; 00613 ++nw; 00614 00615 if ( nw == MAX_ALGORITHMS ) { 00616 break; 00617 } 00618 } 00619 } 00620 00621 for ( int i=nw; i < MAX_ALGORITHMS; ++i ) { 00622 00623 _weights[i] = 1.0; 00624 _sum_weights += 1.0; 00625 } 00626 00627 _thresholds = model_config->getAttributeAsSample( "Thresholds" ); 00628 00629 ConstConfigurationPtr algs_config = model_config->getSubsection( "Algorithms", false ); 00630 00631 if ( ! algs_config ) { 00632 00633 Log::instance()->error( CONSENSUS_LOG_PREFIX "No algorithms could be deserialized.\n" ); 00634 return; 00635 } 00636 00637 Configuration::subsection_list subelements = algs_config->getAllSubsections(); 00638 00639 Configuration::subsection_list::const_iterator end = subelements.end(); 00640 Configuration::subsection_list::const_iterator it = subelements.begin(); 00641 for ( ; it != end; ++it ) { 00642 00643 ConstConfigurationPtr subelement = *it; 00644 00645 if ( subelement->getName() == "Algorithm" ) { 00646 00647 AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( subelement ); 00648 00649 _algs.push_back( alg ); 00650 00651 _norms.push_back( alg->getNormalizer() ); 00652 } 00653 } 00654 00655 _num_algs = (int)_algs.size(); 00656 00657 _initialized = true; 00658 00659 _done = true; 00660 }