openModeller  Version 1.4.0
consensus.cpp
Go to the documentation of this file.
00001 
00027 #include "consensus.hh"
00028 
00029 #include <string>
00030 #include <algorithm>
00031 #include <sstream>
00032 
00033 using namespace std;
00034 
00035 /****************************************************************/
00036 /********************** Algorithm's Metadata ********************/
00037 
00038 #define NUM_PARAM 7
00039 #define MAX_ALGORITHMS 5
00040 
00041 #define CONSENSUS_LOG_PREFIX "Consensus: "
00042 
00043 /******************************/
00044 /*** Algorithm's parameters ***/
00045 
00046 static AlgParamMetadata parameters[NUM_PARAM] = {
00047   
00048   // Algorithm 1
00049   {
00050     "Alg1",       // Id.
00051     "Algorithm1", // Name.
00052     String,       // Type.
00053     "Algorithm 1", // Overview
00054     "First algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface.", // Description.
00055     0,      // Not zero if the parameter has lower limit.
00056     0,      // Parameter's lower limit.
00057     0,      // Not zero if the parameter has upper limit.
00058     0,      // Parameter's upper limit.
00059     "RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=1)" // Parameter's typical (default) value.
00060   },
00061   // Algorithm 2
00062   {
00063     "Alg2",       // Id.
00064     "Algorithm2", // Name.
00065     String,       // Type.
00066     "Algorithm 2", // Overview
00067     "Second algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description.
00068     0,      // Not zero if the parameter has lower limit.
00069     0,      // Parameter's lower limit.
00070     0,      // Not zero if the parameter has upper limit.
00071     0,      // Parameter's upper limit.
00072     ""      // Parameter's typical (default) value.
00073   },
00074   // Algorithm 3
00075   {
00076     "Alg3",       // Id.
00077     "Algorithm3", // Name.
00078     String,       // Type.
00079     "Algorithm 3", // Overview
00080     "Third algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description.
00081     0,      // Not zero if the parameter has lower limit.
00082     0,      // Parameter's lower limit.
00083     0,      // Not zero if the parameter has upper limit.
00084     0,      // Parameter's upper limit.
00085     ""      // Parameter's typical (default) value.
00086   },
00087   // Algorithm 4
00088   {
00089     "Alg4",       // Id.
00090     "Algorithm4", // Name.
00091     String,       // Type.
00092     "Algorithm 4", // Overview
00093     "Fourth algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description.
00094     0,      // Not zero if the parameter has lower limit.
00095     0,      // Parameter's lower limit.
00096     0,      // Not zero if the parameter has upper limit.
00097     0,      // Parameter's upper limit.
00098     ""      // Parameter's typical (default) value.
00099   },
00100   // Algorithm 5
00101   {
00102     "Alg5",       // Id.
00103     "Algorithm5", // Name.
00104     String,       // Type.
00105     "Algorithm 5", // Overview
00106     "Fifth algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description.
00107     0,      // Not zero if the parameter has lower limit.
00108     0,      // Parameter's lower limit.
00109     0,      // Not zero if the parameter has upper limit.
00110     0,      // Parameter's upper limit.
00111     ""      // Parameter's typical (default) value.
00112   },
00113   // Weigths
00114   {
00115     "Weights", // Id.
00116     "Weights", // Name.
00117     String,    // Type.
00118     "Weights", // Overview
00119     "Sequence of weights, each one related to the corresponding algorithm, separated by space. This can be used to give more importance to certain algorithms. Use dot as decimal separator.", // Description.
00120     0,      // Not zero if the parameter has lower limit.
00121     0,      // Parameter's lower limit.
00122     0,      // Not zero if the parameter has upper limit.
00123     0,      // Parameter's upper limit.
00124     "1.0 0.0 0.0 0.0 0.0" // Parameter's typical (default) value.
00125   },
00126   // Minimum level of agreement
00127   {
00128     "Agreement", // Id.
00129     "Agreement", // Name.
00130     Integer,     // Type.
00131     "Minimum level of agreement", // Overview
00132     "Minimum level of agreement between the algorithms. Only predictions that are agreed between the specified number of algorithms will be returned as a positive value.", // Description.
00133     1,      // Not zero if the parameter has lower limit.
00134     1,      // Parameter's lower limit.
00135     1,      // Not zero if the parameter has upper limit.
00136     5,      // Parameter's upper limit.
00137     "1"     // Parameter's typical (default) value.
00138   },
00139 };
00140 
00141 /************************************/
00142 /*** Algorithm's general metadata ***/
00143 
00144 static AlgMetadata metadata = {
00145 
00146   "CONSENSUS",  // Id.
00147   "Consensus",  // Name.
00148   "0.2",        // Version.
00149 
00150   // Overview
00151   "Builds a consensus model with the specified algorithms",
00152 
00153   // Description.
00154   "This is a kind of meta algorithm that receives other algorithms as parameters so that it can generate the individual models and then merge the results into an aggregated model. The maximum number of algorithms is limited to 5. Leave the algorithm parameter blank if you want to use fewer algorithms. IMPORTANT: To specify an algorithm you need to know the algorithm id and its parameters names in openModeller (you can do this by inspecting the request.txt file that comes as an exemple in the command-line interface). Before merging the models, each individual model is transformed into a binary model using the lowest presence threshold. You can assign different weights to each algorithm and also specify the minimum level of agreement between the algorithms. A minimum level of 3 when 5 algorithms are used means that, when less than 3 algorithms agree on a prediction, the result will be zero, so the final model only shows areas where the specified number of algorithms agree on the prediction.",
00155 
00156   "Renato De Giovanni", // Algorithm author.
00157   "", // Bibliography.
00158 
00159   "Renato De Giovanni", // Code author.
00160   "renato [at] cria . org . br", // Code author's contact.
00161 
00162   0, // Does not accept categorical data.
00163   0, // Does not need (pseudo)absence points.
00164 
00165   NUM_PARAM, // Algorithm's parameters.
00166   parameters
00167 };
00168 
00169 /****************************************************************/
00170 /****************** Algorithm's factory function ****************/
00171 
00172 OM_ALG_DLL_EXPORT
00173 AlgorithmImpl *
00174 algorithmFactory()
00175 {
00176   return new ConsensusAlgorithm();
00177 }
00178 
00179 OM_ALG_DLL_EXPORT
00180 AlgMetadata const *
00181 algorithmMetadata()
00182 {
00183   return &metadata;
00184 }
00185 
00186 
00187 /*********************************************/
00188 /************** SVM algorithm ****************/
00189 
00190 /*******************/
00191 /*** constructor ***/
00192 
00193 ConsensusAlgorithm::ConsensusAlgorithm() :
00194   AlgorithmImpl( &metadata ),
00195   _done( false ),
00196   _initialized( false ),
00197   _num_algs( 0 ),
00198   _agreement( 1 )
00199 {
00200 }
00201 
00202 
00203 /******************/
00204 /*** destructor ***/
00205 
00206 ConsensusAlgorithm::~ConsensusAlgorithm()
00207 {
00208   for ( int i=0; i < (int)_algs.size(); i++ ) {
00209 
00210     if ( _norms[i] ) {
00211 
00212       delete _norms[i];
00213     }
00214   }
00215 }
00216 
00217 /**************************/
00218 /*** need Normalization ***/
00219 int ConsensusAlgorithm::needNormalization()
00220 {
00221   return 0;
00222 }
00223 
00224 /******************/
00225 /*** initialize ***/
00226 int
00227 ConsensusAlgorithm::initialize()
00228 {
00229   std::string alg;
00230 
00231   if ( getParameter( "Alg1", &alg ) ) {
00232 
00233     if ( !_setAlgorithm( alg ) ) return 0;
00234   }
00235 
00236   if ( getParameter( "Alg2", &alg ) ) {
00237 
00238     if ( !_setAlgorithm( alg ) ) return 0;
00239   }
00240 
00241   if ( getParameter( "Alg3", &alg ) ) {
00242 
00243     if ( !_setAlgorithm( alg ) ) return 0;
00244   }
00245 
00246   if ( getParameter( "Alg4", &alg ) ) {
00247 
00248     if ( !_setAlgorithm( alg ) ) return 0;
00249   }
00250 
00251   if ( getParameter( "Alg5", &alg ) ) {
00252 
00253     if ( !_setAlgorithm( alg ) ) return 0;
00254   }
00255 
00256   _num_algs = (int)_algs.size();
00257 
00258   if ( _num_algs == 0 ) {
00259 
00260     Log::instance()->error( CONSENSUS_LOG_PREFIX "Consensus needs at least one algorithm. No algorithm could be instantiated based on the parameters.\n" );
00261     return 0;
00262   }
00263 
00264   if ( ! getParameter( "Agreement", &_agreement ) ) {
00265 
00266     _agreement = _num_algs; // default value
00267   }
00268   else {
00269 
00270     if ( _agreement < 1 || _agreement > _num_algs ) {
00271 
00272       _agreement = _num_algs;
00273     }
00274   }
00275 
00276   _thresholds = Sample(MAX_ALGORITHMS, 1.0); // start with maximum threshold
00277 
00278   _weights.resize(MAX_ALGORITHMS);
00279 
00280   std::string weights_param;
00281 
00282   int nw = 0;
00283 
00284   _sum_weights = 0.0;
00285 
00286   if ( getParameter( "Weights", &weights_param ) ) {
00287 
00288     stringstream ss(weights_param);
00289     string weight;
00290     double weight_val;
00291     while ( getline(ss, weight, ' ') ) {
00292 
00293       weight_val = 1.0;
00294       sscanf( weight.c_str(), "%lf", &weight_val );
00295       _weights[nw] = weight_val;
00296       _sum_weights += weight_val;
00297       ++nw;
00298 
00299       if ( nw == MAX_ALGORITHMS ) {
00300         break;
00301       }
00302     }
00303   }
00304 
00305   for ( int i=nw; i < MAX_ALGORITHMS; ++i ) {
00306 
00307     _weights[i] = 1.0;
00308     _sum_weights += 1.0;
00309   }
00310 
00311   for ( int j=0; j < _num_algs; j++ ) {
00312 
00313     SamplerPtr fresh_sampler = cloneSampler(_samp);
00314 
00315     if ( _algs[j]->needNormalization() ) {
00316 
00317       fresh_sampler->normalize( _algs[j]->getNormalizer() );
00318     }
00319 
00320     _algs[j]->setSampler( fresh_sampler );
00321     _algs[j]->initialize();
00322   }
00323 
00324   return 1;
00325 }
00326 
00327 /*********************/
00328 /*** set Algorithm ***/
00329 bool
00330 ConsensusAlgorithm::_setAlgorithm( std::string alg_str )
00331 {
00332   // Remove spaces
00333   alg_str.erase( std::remove_if( alg_str.begin(), alg_str.end(), ::isspace ), alg_str.end() );
00334 
00335   if ( alg_str.size() == 0 ) {
00336 
00337     // Empty alg. Do nothing.
00338     return true;
00339   }
00340 
00341   size_t ini_p = alg_str.find( "(" );
00342 
00343   // No parentheses
00344   if ( ini_p == string::npos ) {
00345 
00346     // means no parameters, so just instantiate the algorithm
00347     AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( alg_str );
00348 
00349     _algs.push_back( alg );
00350 
00351     _norms.push_back( alg->getNormalizer() );
00352 
00353     return true;
00354   }
00355 
00356   // There are parentheses
00357 
00358   // extract ID
00359   std::string alg_id = alg_str.substr(0, ini_p);
00360 
00361   // get parameters
00362   size_t end_p = alg_str.find( ")" );
00363 
00364   if ( end_p == string::npos ) {
00365 
00366     Log::instance()->error( CONSENSUS_LOG_PREFIX "Missing parenthesis in algorithm parameters.\n" );
00367     return false;
00368   }
00369   else if ( end_p < ini_p ) {
00370 
00371     Log::instance()->error( CONSENSUS_LOG_PREFIX "Mismatching parenthesis in algoroithm parameters.\n" );
00372     return false;
00373   }
00374 
00375   std::string alg_params = alg_str.substr(ini_p + 1, end_p - ini_p -1);
00376 
00377   vector<string> pairs;
00378   stringstream ss(alg_params);
00379   string pair;
00380   int nparam = 0;
00381   while ( getline(ss, pair, ',') ) {
00382 
00383     pairs.push_back(pair);
00384     ++nparam;
00385   }
00386 
00387   ParamSetType params;
00388 
00389   for ( int i = 0; i < nparam; i++) {
00390 
00391     size_t eq = pairs[i].find( "=" );
00392 
00393     if ( eq == string::npos || eq == 0 ) {
00394 
00395       Log::instance()->error( CONSENSUS_LOG_PREFIX "Algorithm parameter failed to match key=value pair format.\n" );
00396       return false;
00397     }
00398 
00399     std::string param_id = pairs[i].substr(0, eq);
00400     std::string param_val = pairs[i].substr(eq+1);
00401 
00402     params.insert( std::pair<icstring,std::string>(param_id, param_val) );
00403   }
00404 
00405   AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( alg_id );
00406 
00407   alg->setParameters( params );
00408 
00409   _algs.push_back( alg );
00410 
00411   _norms.push_back( alg->getNormalizer() );
00412 
00413   return true;
00414 }
00415 
00416 /***************/
00417 /*** iterate ***/
00418 int
00419 ConsensusAlgorithm::iterate()
00420 {
00421   _done = true;
00422 
00423   for ( int j=0; j < _num_algs; j++ ) {
00424 
00425     if ( ! _algs[j]->done() ) {
00426 
00427       _done = false;
00428 
00429       if ( ! _algs[j]->iterate() ) {
00430 
00431         return 0;
00432       }
00433     }
00434   }
00435 
00436   // get LPT
00437   if ( _done ) {
00438 
00439     OccurrencesPtr presences = _samp->getPresences();
00440 
00441     OccurrencesImpl::const_iterator p_iterator;
00442     OccurrencesImpl::const_iterator p_end;
00443 
00444     Scalar val;
00445 
00446     while ( p_iterator != p_end ) {
00447 
00448       Sample env = (*p_iterator)->environment();
00449 
00450       for ( int j=0; j < _num_algs; j++ ) {
00451 
00452         if ( _norms[j] ) {
00453 
00454           Sample mysamp = Sample( env ); // deep copy
00455           _norms[j]->normalize( &mysamp );
00456           val = _algs[j]->getValue( mysamp );
00457         }
00458         else {
00459 
00460           val = _algs[j]->getValue( env );
00461   }
00462 
00463         if ( val < _thresholds[j] && val > 0.0 ) {
00464 
00465           _thresholds[j] = val;
00466         }
00467       }
00468 
00469       ++p_iterator;
00470     }
00471   }
00472 
00473   return 1;
00474 }
00475 
00476 /********************/
00477 /*** get Progress ***/
00478 float ConsensusAlgorithm::getProgress() const
00479 {
00480   float progress = 0.0;
00481 
00482   for ( int j=0; j < _num_algs; j++ ) {
00483 
00484     progress += _algs[j]->getProgress();
00485   }
00486 
00487   return progress/(float)_num_algs;
00488 }
00489 
00490 
00491 /************/
00492 /*** done ***/
00493 int
00494 ConsensusAlgorithm::done() const
00495 {
00496   return _done;
00497 }
00498 
00499 /*****************/
00500 /*** get Value ***/
00501 Scalar
00502 ConsensusAlgorithm::getValue( const Sample& x ) const
00503 {
00504   Scalar prob = 0.0;
00505   Scalar v;
00506   int agree = 0;
00507 
00508   for ( int i=0; i < _num_algs; i++ ) {
00509 
00510     if ( _norms[i] ) {
00511 
00512       Sample y( x );
00513       _norms[i]->normalize( &y );
00514       v = _algs[i]->getValue( y );
00515     }
00516     else {
00517 
00518       v = _algs[i]->getValue( x );
00519     }
00520 
00521     if ( v >= _thresholds[i] ) {
00522 
00523       prob +=  1.0 * _weights[i];
00524       agree++;
00525     }
00526   }
00527 
00528   if ( agree < _agreement ) {
00529 
00530     return 0.0;
00531   }
00532 
00533   return prob/_sum_weights;
00534 }
00535 
00536 /***********************/
00537 /*** get Convergence ***/
00538 int
00539 ConsensusAlgorithm::getConvergence( Scalar * const val ) const
00540 {
00541   *val = 1.0;
00542   return 1;
00543 }
00544 
00545 /****************************************************************/
00546 /****************** configuration *******************************/
00547 void
00548 ConsensusAlgorithm::_getConfiguration( ConfigurationPtr& config ) const
00549 {
00550   if ( ! _done )
00551     return;
00552 
00553   ConfigurationPtr model_config( new ConfigurationImpl("Consensus") );
00554   config->addSubsection( model_config );
00555 
00556   model_config->addNameValue( "Thresholds", _thresholds );
00557 
00558   ConfigurationPtr algs_config( new ConfigurationImpl("Algorithms") );
00559   model_config->addSubsection( algs_config );
00560 
00561   for ( int i=0; i < _num_algs; i++ ) {
00562 
00563     ConfigurationPtr alg_config = _algs[i]->getConfiguration();
00564     algs_config->addSubsection( alg_config );
00565   }
00566 }
00567 
00568 void
00569 ConsensusAlgorithm::_setConfiguration( const ConstConfigurationPtr& config )
00570 {
00571   ConstConfigurationPtr model_config = config->getSubsection( "Consensus", false );
00572 
00573   if ( ! model_config )
00574     return;
00575 
00576   if ( ! getParameter("Agreement", &_agreement) ) {
00577 
00578     Log::instance()->error("Parameter 'Agreement' was not found in serialized model.\n");
00579     return;
00580   }
00581   else {
00582 
00583     if ( _agreement < 1 || _agreement > MAX_ALGORITHMS ) {
00584 
00585       _agreement = 2;
00586     }
00587   }
00588 
00589   _weights.resize(MAX_ALGORITHMS);
00590 
00591   std::string weights_param;
00592 
00593   int nw = 0;
00594 
00595   _sum_weights = 0.0;
00596 
00597   if ( ! getParameter( "Weights", &weights_param ) ) {
00598 
00599     Log::instance()->error("Parameter 'Weights' was not found in serialized model.\n");
00600     return;
00601   }
00602   else {
00603 
00604     stringstream ss(weights_param);
00605     string weight;
00606     double weight_val;
00607     while ( getline(ss, weight, ' ') ) {
00608 
00609       weight_val = 1.0;
00610       sscanf( weight.c_str(), "%lf", &weight_val );
00611       _weights[nw] = weight_val;
00612       _sum_weights += weight_val;
00613       ++nw;
00614 
00615       if ( nw == MAX_ALGORITHMS ) {
00616         break;
00617       }
00618     }
00619   }
00620 
00621   for ( int i=nw; i < MAX_ALGORITHMS; ++i ) {
00622 
00623     _weights[i] = 1.0;
00624     _sum_weights += 1.0;
00625   }
00626 
00627   _thresholds = model_config->getAttributeAsSample( "Thresholds" );
00628 
00629   ConstConfigurationPtr algs_config = model_config->getSubsection( "Algorithms", false );
00630 
00631   if ( ! algs_config ) {
00632 
00633     Log::instance()->error( CONSENSUS_LOG_PREFIX "No algorithms could be deserialized.\n" );
00634     return;
00635   }
00636 
00637   Configuration::subsection_list subelements = algs_config->getAllSubsections();
00638 
00639   Configuration::subsection_list::const_iterator end = subelements.end();
00640   Configuration::subsection_list::const_iterator it = subelements.begin();
00641   for ( ; it != end; ++it ) {
00642 
00643     ConstConfigurationPtr subelement = *it;
00644 
00645     if ( subelement->getName() == "Algorithm" ) {
00646 
00647       AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( subelement );
00648 
00649       _algs.push_back( alg );
00650 
00651       _norms.push_back( alg->getNormalizer() );
00652     }
00653   }
00654 
00655   _num_algs = (int)_algs.size();
00656 
00657   _initialized = true;
00658 
00659   _done = true;
00660 }