openModeller  Version 1.5.0
consensus.cpp
Go to the documentation of this file.
1 
27 #include "consensus.hh"
28 
29 #include <string>
30 #include <algorithm>
31 #include <sstream>
32 
33 using namespace std;
34 
35 /****************************************************************/
36 /********************** Algorithm's Metadata ********************/
37 
38 #define NUM_PARAM 7
39 #define MAX_ALGORITHMS 5
40 
41 #define CONSENSUS_LOG_PREFIX "Consensus: "
42 
43 /******************************/
44 /*** Algorithm's parameters ***/
45 
47 
48  // Algorithm 1
49  {
50  "Alg1", // Id.
51  "Algorithm1", // Name.
52  String, // Type.
53  "Algorithm 1", // Overview
54  "First algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface.", // Description.
55  0, // Not zero if the parameter has lower limit.
56  0, // Parameter's lower limit.
57  0, // Not zero if the parameter has upper limit.
58  0, // Parameter's upper limit.
59  "RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=1)" // Parameter's typical (default) value.
60  },
61  // Algorithm 2
62  {
63  "Alg2", // Id.
64  "Algorithm2", // Name.
65  String, // Type.
66  "Algorithm 2", // Overview
67  "Second algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description.
68  0, // Not zero if the parameter has lower limit.
69  0, // Parameter's lower limit.
70  0, // Not zero if the parameter has upper limit.
71  0, // Parameter's upper limit.
72  "" // Parameter's typical (default) value.
73  },
74  // Algorithm 3
75  {
76  "Alg3", // Id.
77  "Algorithm3", // Name.
78  String, // Type.
79  "Algorithm 3", // Overview
80  "Third algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description.
81  0, // Not zero if the parameter has lower limit.
82  0, // Parameter's lower limit.
83  0, // Not zero if the parameter has upper limit.
84  0, // Parameter's upper limit.
85  "" // Parameter's typical (default) value.
86  },
87  // Algorithm 4
88  {
89  "Alg4", // Id.
90  "Algorithm4", // Name.
91  String, // Type.
92  "Algorithm 4", // Overview
93  "Fourth algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description.
94  0, // Not zero if the parameter has lower limit.
95  0, // Parameter's lower limit.
96  0, // Not zero if the parameter has upper limit.
97  0, // Parameter's upper limit.
98  "" // Parameter's typical (default) value.
99  },
100  // Algorithm 5
101  {
102  "Alg5", // Id.
103  "Algorithm5", // Name.
104  String, // Type.
105  "Algorithm 5", // Overview
106  "Fifth algorithm to be used in the consensus. It must be specified by its id followed by a sequence of parameter_name=parameter_value separated by comma and enclosed by a parentheses, such as: RF(NumTrees=10,VarsPerTree=0,ForceUnsupervisedLearning=0). Existing algorithm ids and parameter names can be found in the end of the om_console request file that comes with the openModeller command line interface. Leave empty if you don't want to use any further algorithms", // Description.
107  0, // Not zero if the parameter has lower limit.
108  0, // Parameter's lower limit.
109  0, // Not zero if the parameter has upper limit.
110  0, // Parameter's upper limit.
111  "" // Parameter's typical (default) value.
112  },
113  // Weigths
114  {
115  "Weights", // Id.
116  "Weights", // Name.
117  String, // Type.
118  "Weights", // Overview
119  "Sequence of weights, each one related to the corresponding algorithm, separated by space. This can be used to give more importance to certain algorithms. Use dot as decimal separator.", // Description.
120  0, // Not zero if the parameter has lower limit.
121  0, // Parameter's lower limit.
122  0, // Not zero if the parameter has upper limit.
123  0, // Parameter's upper limit.
124  "1.0 0.0 0.0 0.0 0.0" // Parameter's typical (default) value.
125  },
126  // Minimum level of agreement
127  {
128  "Agreement", // Id.
129  "Agreement", // Name.
130  Integer, // Type.
131  "Minimum level of agreement", // Overview
132  "Minimum level of agreement between the algorithms. Only predictions that are agreed between the specified number of algorithms will be returned as a positive value.", // Description.
133  1, // Not zero if the parameter has lower limit.
134  1, // Parameter's lower limit.
135  1, // Not zero if the parameter has upper limit.
136  5, // Parameter's upper limit.
137  "1" // Parameter's typical (default) value.
138  },
139 };
140 
141 /************************************/
142 /*** Algorithm's general metadata ***/
143 
145 
146  "CONSENSUS", // Id.
147  "Consensus", // Name.
148  "0.2", // Version.
149 
150  // Overview
151  "Builds a consensus model with the specified algorithms",
152 
153  // Description.
154  "This is a kind of meta algorithm that receives other algorithms as parameters so that it can generate the individual models and then merge the results into an aggregated model. The maximum number of algorithms is limited to 5. Leave the algorithm parameter blank if you want to use fewer algorithms. IMPORTANT: To specify an algorithm you need to know the algorithm id and its parameters names in openModeller (you can do this by inspecting the request.txt file that comes as an exemple in the command-line interface). Before merging the models, each individual model is transformed into a binary model using the lowest presence threshold. You can assign different weights to each algorithm and also specify the minimum level of agreement between the algorithms. A minimum level of 3 when 5 algorithms are used means that, when less than 3 algorithms agree on a prediction, the result will be zero, so the final model only shows areas where the specified number of algorithms agree on the prediction.",
155 
156  "Renato De Giovanni", // Algorithm author.
157  "", // Bibliography.
158 
159  "Renato De Giovanni", // Code author.
160  "renato [at] cria . org . br", // Code author's contact.
161 
162  0, // Does not accept categorical data.
163  0, // Does not need (pseudo)absence points.
164 
165  NUM_PARAM, // Algorithm's parameters.
166  parameters
167 };
168 
169 /****************************************************************/
170 /****************** Algorithm's factory function ****************/
171 
172 OM_ALG_DLL_EXPORT
175 {
176  return new ConsensusAlgorithm();
177 }
178 
179 OM_ALG_DLL_EXPORT
180 AlgMetadata const *
182 {
183  return &metadata;
184 }
185 
186 
187 /*********************************************/
188 /************** SVM algorithm ****************/
189 
190 /*******************/
191 /*** constructor ***/
192 
195  _done( false ),
196  _initialized( false ),
197  _num_algs( 0 ),
198  _agreement( 1 )
199 {
200 }
201 
202 
203 /******************/
204 /*** destructor ***/
205 
207 {
208  for ( int i=0; i < (int)_algs.size(); i++ ) {
209 
210  if ( _norms[i] ) {
211 
212  delete _norms[i];
213  }
214  }
215 }
216 
217 /**************************/
218 /*** need Normalization ***/
220 {
221  return 0;
222 }
223 
224 /******************/
225 /*** initialize ***/
226 int
228 {
229  std::string alg;
230 
231  if ( getParameter( "Alg1", &alg ) ) {
232 
233  if ( !_setAlgorithm( alg ) ) return 0;
234  }
235 
236  if ( getParameter( "Alg2", &alg ) ) {
237 
238  if ( !_setAlgorithm( alg ) ) return 0;
239  }
240 
241  if ( getParameter( "Alg3", &alg ) ) {
242 
243  if ( !_setAlgorithm( alg ) ) return 0;
244  }
245 
246  if ( getParameter( "Alg4", &alg ) ) {
247 
248  if ( !_setAlgorithm( alg ) ) return 0;
249  }
250 
251  if ( getParameter( "Alg5", &alg ) ) {
252 
253  if ( !_setAlgorithm( alg ) ) return 0;
254  }
255 
256  _num_algs = (int)_algs.size();
257 
258  if ( _num_algs == 0 ) {
259 
260  Log::instance()->error( CONSENSUS_LOG_PREFIX "Consensus needs at least one algorithm. No algorithm could be instantiated based on the parameters.\n" );
261  return 0;
262  }
263 
264  if ( ! getParameter( "Agreement", &_agreement ) ) {
265 
266  _agreement = _num_algs; // default value
267  }
268  else {
269 
270  if ( _agreement < 1 || _agreement > _num_algs ) {
271 
273  }
274  }
275 
276  _thresholds = Sample(MAX_ALGORITHMS, 1.0); // start with maximum threshold
277 
279 
280  std::string weights_param;
281 
282  int nw = 0;
283 
284  _sum_weights = 0.0;
285 
286  if ( getParameter( "Weights", &weights_param ) ) {
287 
288  stringstream ss(weights_param);
289  string weight;
290  double weight_val;
291  while ( getline(ss, weight, ' ') ) {
292 
293  weight_val = 1.0;
294  sscanf( weight.c_str(), "%lf", &weight_val );
295  _weights[nw] = weight_val;
296  _sum_weights += weight_val;
297  ++nw;
298 
299  if ( nw == MAX_ALGORITHMS ) {
300  break;
301  }
302  }
303  }
304 
305  for ( int i=nw; i < MAX_ALGORITHMS; ++i ) {
306 
307  _weights[i] = 1.0;
308  _sum_weights += 1.0;
309  }
310 
311  for ( int j=0; j < _num_algs; j++ ) {
312 
313  SamplerPtr fresh_sampler = cloneSampler(_samp);
314 
315  if ( _algs[j]->needNormalization() ) {
316 
317  fresh_sampler->normalize( _algs[j]->getNormalizer() );
318  }
319 
320  _algs[j]->setSampler( fresh_sampler );
321  _algs[j]->initialize();
322  }
323 
324  return 1;
325 }
326 
327 /*********************/
328 /*** set Algorithm ***/
329 bool
330 ConsensusAlgorithm::_setAlgorithm( std::string alg_str )
331 {
332  // Remove spaces
333  alg_str.erase( std::remove_if( alg_str.begin(), alg_str.end(), ::isspace ), alg_str.end() );
334 
335  if ( alg_str.size() == 0 ) {
336 
337  // Empty alg. Do nothing.
338  return true;
339  }
340 
341  size_t ini_p = alg_str.find( "(" );
342 
343  // No parentheses
344  if ( ini_p == string::npos ) {
345 
346  // means no parameters, so just instantiate the algorithm
348 
349  _algs.push_back( alg );
350 
351  _norms.push_back( alg->getNormalizer() );
352 
353  return true;
354  }
355 
356  // There are parentheses
357 
358  // extract ID
359  std::string alg_id = alg_str.substr(0, ini_p);
360 
361  // get parameters
362  size_t end_p = alg_str.find( ")" );
363 
364  if ( end_p == string::npos ) {
365 
366  Log::instance()->error( CONSENSUS_LOG_PREFIX "Missing parenthesis in algorithm parameters.\n" );
367  return false;
368  }
369  else if ( end_p < ini_p ) {
370 
371  Log::instance()->error( CONSENSUS_LOG_PREFIX "Mismatching parenthesis in algoroithm parameters.\n" );
372  return false;
373  }
374 
375  std::string alg_params = alg_str.substr(ini_p + 1, end_p - ini_p -1);
376 
377  vector<string> pairs;
378  stringstream ss(alg_params);
379  string pair;
380  int nparam = 0;
381  while ( getline(ss, pair, ',') ) {
382 
383  pairs.push_back(pair);
384  ++nparam;
385  }
386 
387  ParamSetType params;
388 
389  for ( int i = 0; i < nparam; i++) {
390 
391  size_t eq = pairs[i].find( "=" );
392 
393  if ( eq == string::npos || eq == 0 ) {
394 
395  Log::instance()->error( CONSENSUS_LOG_PREFIX "Algorithm parameter failed to match key=value pair format.\n" );
396  return false;
397  }
398 
399  std::string param_id = pairs[i].substr(0, eq);
400  std::string param_val = pairs[i].substr(eq+1);
401 
402  params.insert( std::pair<icstring,std::string>(param_id, param_val) );
403  }
404 
406 
407  alg->setParameters( params );
408 
409  _algs.push_back( alg );
410 
411  _norms.push_back( alg->getNormalizer() );
412 
413  return true;
414 }
415 
416 /***************/
417 /*** iterate ***/
418 int
420 {
421  _done = true;
422 
423  for ( int j=0; j < _num_algs; j++ ) {
424 
425  if ( ! _algs[j]->done() ) {
426 
427  _done = false;
428 
429  if ( ! _algs[j]->iterate() ) {
430 
431  return 0;
432  }
433  }
434  }
435 
436  // get LPT
437  if ( _done ) {
438 
439  OccurrencesPtr presences = _samp->getPresences();
440 
443 
444  Scalar val;
445 
446  while ( p_iterator != p_end ) {
447 
448  Sample env = (*p_iterator)->environment();
449 
450  for ( int j=0; j < _num_algs; j++ ) {
451 
452  if ( _norms[j] ) {
453 
454  Sample mysamp = Sample( env ); // deep copy
455  _norms[j]->normalize( &mysamp );
456  val = _algs[j]->getValue( mysamp );
457  }
458  else {
459 
460  val = _algs[j]->getValue( env );
461  }
462 
463  if ( val < _thresholds[j] && val > 0.0 ) {
464 
465  _thresholds[j] = val;
466  }
467  }
468 
469  ++p_iterator;
470  }
471  }
472 
473  return 1;
474 }
475 
476 /********************/
477 /*** get Progress ***/
479 {
480  float progress = 0.0;
481 
482  for ( int j=0; j < _num_algs; j++ ) {
483 
484  progress += _algs[j]->getProgress();
485  }
486 
487  return progress/(float)_num_algs;
488 }
489 
490 
491 /************/
492 /*** done ***/
493 int
495 {
496  return _done;
497 }
498 
499 /*****************/
500 /*** get Value ***/
501 Scalar
503 {
504  Scalar prob = 0.0;
505  Scalar v;
506  int agree = 0;
507 
508  for ( int i=0; i < _num_algs; i++ ) {
509 
510  if ( _norms[i] ) {
511 
512  Sample y( x );
513  _norms[i]->normalize( &y );
514  v = _algs[i]->getValue( y );
515  }
516  else {
517 
518  v = _algs[i]->getValue( x );
519  }
520 
521  if ( v >= _thresholds[i] ) {
522 
523  prob += 1.0 * _weights[i];
524  agree++;
525  }
526  }
527 
528  if ( agree < _agreement ) {
529 
530  return 0.0;
531  }
532 
533  return prob/_sum_weights;
534 }
535 
536 /***********************/
537 /*** get Convergence ***/
538 int
540 {
541  *val = 1.0;
542  return 1;
543 }
544 
545 /****************************************************************/
546 /****************** configuration *******************************/
547 void
549 {
550  if ( ! _done )
551  return;
552 
553  ConfigurationPtr model_config( new ConfigurationImpl("Consensus") );
554  config->addSubsection( model_config );
555 
556  model_config->addNameValue( "Thresholds", _thresholds );
557 
558  ConfigurationPtr algs_config( new ConfigurationImpl("Algorithms") );
559  model_config->addSubsection( algs_config );
560 
561  for ( int i=0; i < _num_algs; i++ ) {
562 
563  ConfigurationPtr alg_config = _algs[i]->getConfiguration();
564  algs_config->addSubsection( alg_config );
565  }
566 }
567 
568 void
570 {
571  ConstConfigurationPtr model_config = config->getSubsection( "Consensus", false );
572 
573  if ( ! model_config )
574  return;
575 
576  if ( ! getParameter("Agreement", &_agreement) ) {
577 
578  Log::instance()->error("Parameter 'Agreement' was not found in serialized model.\n");
579  return;
580  }
581  else {
582 
583  if ( _agreement < 1 || _agreement > MAX_ALGORITHMS ) {
584 
585  _agreement = 2;
586  }
587  }
588 
590 
591  std::string weights_param;
592 
593  int nw = 0;
594 
595  _sum_weights = 0.0;
596 
597  if ( ! getParameter( "Weights", &weights_param ) ) {
598 
599  Log::instance()->error("Parameter 'Weights' was not found in serialized model.\n");
600  return;
601  }
602  else {
603 
604  stringstream ss(weights_param);
605  string weight;
606  double weight_val;
607  while ( getline(ss, weight, ' ') ) {
608 
609  weight_val = 1.0;
610  sscanf( weight.c_str(), "%lf", &weight_val );
611  _weights[nw] = weight_val;
612  _sum_weights += weight_val;
613  ++nw;
614 
615  if ( nw == MAX_ALGORITHMS ) {
616  break;
617  }
618  }
619  }
620 
621  for ( int i=nw; i < MAX_ALGORITHMS; ++i ) {
622 
623  _weights[i] = 1.0;
624  _sum_weights += 1.0;
625  }
626 
627  _thresholds = model_config->getAttributeAsSample( "Thresholds" );
628 
629  ConstConfigurationPtr algs_config = model_config->getSubsection( "Algorithms", false );
630 
631  if ( ! algs_config ) {
632 
633  Log::instance()->error( CONSENSUS_LOG_PREFIX "No algorithms could be deserialized.\n" );
634  return;
635  }
636 
637  Configuration::subsection_list subelements = algs_config->getAllSubsections();
638 
639  Configuration::subsection_list::const_iterator end = subelements.end();
640  Configuration::subsection_list::const_iterator it = subelements.begin();
641  for ( ; it != end; ++it ) {
642 
643  ConstConfigurationPtr subelement = *it;
644 
645  if ( subelement->getName() == "Algorithm" ) {
646 
647  AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( subelement );
648 
649  _algs.push_back( alg );
650 
651  _norms.push_back( alg->getNormalizer() );
652  }
653  }
654 
655  _num_algs = (int)_algs.size();
656 
657  _initialized = true;
658 
659  _done = true;
660 }
Normalizer * getNormalizer() const
Definition: Algorithm.cpp:338
vector< AlgorithmPtr > _algs
Definition: consensus.hh:74
float getProgress() const
Definition: consensus.cpp:478
OM_ALG_DLL_EXPORT AlgMetadata const * algorithmMetadata()
Definition: consensus.cpp:181
std::vector< ConfigurationPtr > subsection_list
static AlgorithmPtr newAlgorithm(std::string const id)
int done() const
Definition: consensus.cpp:494
double Scalar
Type of map values.
Definition: om_defs.hh:39
vector< Normalizer * > _norms
Definition: consensus.hh:80
SamplerPtr cloneSampler(const SamplerPtr &orig)
Definition: Sampler.cpp:1219
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
int getConvergence(Scalar *const val) const
Definition: consensus.cpp:539
void error(const char *format,...)
'Error' level.
Definition: Log.cpp:290
int getParameter(std::string const &name, std::string *value)
void resize(std::size_t size)
Definition: Sample.cpp:153
OM_ALG_DLL_EXPORT AlgorithmImpl * algorithmFactory()
Definition: consensus.cpp:174
std::map< icstring, std::string > ParamSetType
Definition: Algorithm.hh:84
void _getConfiguration(ConfigurationPtr &) const
Definition: consensus.cpp:548
static AlgParamMetadata parameters[NUM_PARAM]
Definition: consensus.cpp:46
void _setConfiguration(const ConstConfigurationPtr &)
Definition: consensus.cpp:569
#define NUM_PARAM
Definition: consensus.cpp:38
bool _setAlgorithm(std::string alg_str)
Definition: consensus.cpp:330
#define MAX_ALGORITHMS
Definition: consensus.cpp:39
SamplerPtr _samp
Definition: Algorithm.hh:245
std::vector< OccurrencePtr >::const_iterator const_iterator
Definition: Occurrences.hh:85
Scalar getValue(const Sample &x) const
Definition: consensus.cpp:502
static AlgMetadata metadata
Definition: consensus.cpp:144
#define CONSENSUS_LOG_PREFIX
Definition: consensus.cpp:41
Definition: Sample.hh:25