openModeller  Version 1.5.0
OpenModeller.cpp
Go to the documentation of this file.
1 
29 
30 #include <openmodeller/om_defs.hh>
31 #include <openmodeller/Log.hh>
34 #include <openmodeller/Sampler.hh>
42 #include <openmodeller/Model.hh>
44 
47 
49 
50 #include <string>
51 using std::string;
52 
53 /*** Callback "setters" ***/
54 
56 
58 }
59 
61 
63 }
64 
66 
68 }
69 
70 /****************************************************************/
71 /************************* Open Modeller ************************/
72 
73 /********************/
74 /*** constructors ***/
75 
77  _confusion_matrix(),
78  _roc_curve()
79 {
80  _error[0] = '\0';
81 
84 }
85 
86 
87 /******************/
88 /*** destructor ***/
89 
91 {
92  delete _actualAreaStats;
93  delete _estimatedAreaStats;
94 }
95 
96 
97 /*********************/
98 /*** get log level ***/
99 void
101 {
102  Log::instance()->setLevel( level );
103 }
104 
105 
106 /*******************/
107 /*** get Version ***/
108 std::string
110 {
111  return std::string( OM_VERSION );
112 }
113 
114 /****************************/
115 /*** available Algorithms ***/
116 AlgMetadata const **
118 {
120 }
121 
122 
123 /**************************/
124 /*** algorithm Metadata ***/
125 AlgMetadata const*
126 OpenModeller::algorithmMetadata( char const *algorithm_id )
127 {
128  return AlgorithmFactory::algorithmMetadata( algorithm_id );
129 }
130 
131 
132 /********************************/
133 /*** num Available Algorithms ***/
134 int
136 {
138 }
139 
140 bool
142 {
143  return ( _env || ( (_presence)? _presence->hasEnvironment() : false ) );
144 }
145 
146 /***********************/
147 /*** set Occurrences ***/
148 int
150  const OccurrencesPtr& absence )
151 {
152  if ( ( ! presence || presence->numOccurrences() == 0 ) &&
153  ( ! absence || absence->numOccurrences() == 0 ) ) {
154 
155  Log::instance()->error( "Occurrences must not be empty\n" );
156  return 0;
157  }
158  _presence = presence;
159  _absence = absence;
160 
161  return 1;
162 }
163 
164 /***********************/
165 /*** set Environment ***/
166 void
167 OpenModeller::setEnvironment( std::vector<std::string> categ_map,
168  std::vector<std::string> continuous_map,
169  const std::string& mask )
170 {
171  _env = createEnvironment( categ_map, continuous_map, mask);
172 }
173 
174 /*******************/
175 /*** set Sampler ***/
176 void
178 {
179  _samp = sampler;
180 }
181 
182 /*********************/
183 /*** set Algorithm ***/
184 int
185 OpenModeller::setAlgorithm( std::string const id, int nparam,
186  AlgParameter const *param )
187 {
188  if ( nparam && ! param ) {
189 
190  Log::instance()->error( "Wrong parameters when setting algorithm\n" );
191  return 0;
192  }
193 
194  if ( ! _samp ) {
195 
196  // Create a default sampler if none was previously provided
197  if ( ! hasEnvironment() ) {
198 
199  Log::instance()->warn( "Sampler could not be initialized. Environment not set.\n" );
200  return 0;
201  }
202  else if ( ( ! _presence ) && ( ! _absence ) ) {
203 
204  Log::instance()->warn( "Sampler could not be initialized. Occurrences not set.\n" );
205  return 0;
206  }
207  else {
208 
209  // _env and (_presence or _absence) are set
211  }
212  }
213 
215 
216  if ( ! _alg ) {
217 
218  Log::instance()->error( _error, "Could not find (%s) algorithm.", id.c_str() );
219  return 0;
220  }
221 
222  _alg->setSampler( _samp );
223  _alg->setParameters( nparam, param );
224 
225  return 1;
226 }
227 
228 /********************/
229 /*** create Model ***/
230 int
232 {
234  _roc_curve.reset();
235 
236  Log::instance()->info( "Creating model\n" );
237 
238  // Sampler
239  if ( ! _samp ) {
240 
241  Log::instance()->error( "Sampler not specified for model creation.\n" );
242  return 0;
243  }
244 
245  // Algorithm.
246  if ( ! _alg ) {
247 
248  Log::instance()->error( "Algorithm not specified for model creation.\n" );
249  return 0;
250  }
251 
252  _alg->createModel( _samp, &_callback_wrapper );
253 
254  // note: Leave the 2 spaces in the end of the message to cover the
255  // previous model creation progress log.
256  Log::instance()->info( "Finished creating model \n" );
257 
258  return 1;
259 }
260 
261 
262 /******************/
263 /*** create Map ***/
264 int
265 OpenModeller::createMap( const EnvironmentPtr & env, char const *output_file, MapFormat& output_format )
266 {
267  Log::instance()->info( "Projecting model\n" );
268 
269  if ( ! env ) {
270 
271  Log::instance()->error( "Projection environment not specified\n" );
272  return 0;
273  }
274 
275  _projEnv = env;
276 
277  if ( ! _alg ) {
278 
279  Log::instance()->error( "Algorithm not specified\n" );
280  return 0;
281  }
282 
283  Model model( _alg->getModel() );
284 
285  Map *mask = _projEnv->getMask();
286 
287  // If mask is undefined, use first layer as a mask
288  if ( ! mask ) {
289 
290  mask = _projEnv->getLayer(0);
291  }
292 
293  // Store output format in property
294  _format = output_format;
295 
296  // copy mask settings to the output format ONLY when they are undefined
297  _format.copyDefaults( *mask );
298 
299  // try to infer the output file format (default is 64 bit floating tiff).
300  string fname( output_file );
301 
302  int pos = fname.length() - 4;
303 
304  if ( pos > 0 ) {
305 
306  if ( fname.compare( pos, 4, ".bmp" ) == 0 ) {
307 
309  Log::instance()->warn ( "Using greyscale bmp as output format based on extension\n" );
310  }
311  }
312 
313  // Create map on disc.
314 
315 #ifdef MPI_FOUND
316  Map map( RasterFactory::instance().create( output_file, output_file, _format ) );
317 #else
318  Map map( RasterFactory::instance().create( output_file, _format ) );
319 #endif
320 
321  bool finished = Projector::createMap( model, _projEnv, &map, _actualAreaStats, &_callback_wrapper );
322 
323  if ( ! finished ) {
324 
325  Log::instance()->info( "Error during model projection\n" );
326  return 0;
327  }
328 
329  Log::instance()->info( "Finished projecting model\n" );
330 
331  return 1;
332 }
333 
334 int
335 OpenModeller::createMap( const EnvironmentPtr & env, char const *output_file )
336 {
337  return createMap( env, output_file, _format );
338 }
339 
340 /******************/
341 /*** create Map ***/
342 int
343 OpenModeller::createMap( char const *output_file )
344 {
345  if ( ! _projEnv ) {
346 
347  _projEnv = _env;
348  }
349 
350  return createMap( _projEnv, output_file, _format );
351 }
352 
353 /******************/
354 /*** create Map ***/
355 int
356 OpenModeller::createMap( char const *output_file, MapFormat& output_format )
357 {
358  if ( ! _projEnv ) {
359 
360  _projEnv = _env;
361  }
362 
363  return createMap( _projEnv, output_file, output_format );
364 }
365 
366 /**********************************/
367 /******* get Value ****************/
368 Scalar
370 {
371  if ( ! _env ) {
372 
373  return -1.0;
374  }
375 
376  // FIXME: enable geotransformation
377  const Sample& sample = env->get( x, y );
378 
379  if ( sample.size() == 0 ) {
380 
381  return -1.0;
382  }
383 
384  Scalar val = _alg->getValue( sample );
385  if ( val < 0.0 ) val = 0.0;
386  if ( val > 1.0 ) val = 1.0;
387 
388  return val;
389 }
390 
391 
392 /**********************************/
393 /******* get Value ****************/
394 Scalar
395 OpenModeller::getValue( Scalar const *environment_values )
396 {
397  Sample tmp( _env->numLayers() ,environment_values );
398  return _alg->getValue( tmp );
399 }
400 
401 
402 /************************************/
403 /******* get Actual AreaStats *******/
404 AreaStats *
406 {
407  return new AreaStats( _actualAreaStats );
408 }
409 
410 
411 /***************************************/
412 /******* get Estimated AreaStats *******/
413 AreaStats * OpenModeller::getEstimatedAreaStats(double proportionAreaToSample)
414 {
415  return getEstimatedAreaStats( _env, proportionAreaToSample );
416 }
417 
418 
419 /***************************************/
420 /******* get Estimated AreaStats *******/
422  double proportionAreaToSample)
423 {
424  int i, sampleSize, numCells, xdim, ydim;
425 
426  if ( !env ) {
427 
428  // this method does not work without _env properly set
429  return NULL;
430  }
431 
432  if ( !_estimatedAreaStats ) {
433 
435  }
436  else {
437 
439  }
440 
441  // get number of cells to sample
442  // note that the total area does not take the mask into account
443  // thus all cells (masked or unmasked) are counted
444  env->getMask()->getDim(&xdim, &ydim);
445  numCells = xdim * ydim;
446 
447  sampleSize = (int) (numCells * proportionAreaToSample);
448 
449  for (i = 0; i < sampleSize; i++) {
450 
451  const Sample& sample = env->getRandom();
452 
453  _estimatedAreaStats->addPrediction(_alg->getValue(sample));
454  }
455 
456  return _estimatedAreaStats;
457 }
458 
459 
460 /************************************/
461 /******* get Confusion Matrix *******/
463 {
464  if ( _confusion_matrix.ready() ) {
465 
466  return &_confusion_matrix;
467  }
468 
470 
471  return &_confusion_matrix;
472 }
473 
474 /*****************************/
475 /******* get Roc Curve *******/
477 {
478  if ( _roc_curve.ready() ) {
479 
480  return &_roc_curve;
481  }
482 
484 
485  return &_roc_curve;
486 }
487 
488 
489 /***************************************/
490 /******* get Model Configuration *******/
493 {
494  ConfigurationPtr config( new ConfigurationImpl("SerializedModel"));
495 
496  ConfigurationPtr sampler_config( _samp->getConfiguration() );
497 
498  config->addSubsection( sampler_config );
499 
500  ConfigurationPtr alg_config( _alg->getConfiguration() );
501 
502  config->addSubsection( alg_config );
503 
504  ConfigurationPtr stats_config( new ConfigurationImpl("Statistics") );
505 
506  if ( _confusion_matrix.ready() ) {
507 
509 
510  stats_config->addSubsection( cm_config );
511  }
512 
513  if ( _roc_curve.ready() ) {
514 
516 
517  stats_config->addSubsection( roc_config );
518  }
519 
520  config->addSubsection( stats_config );
521 
522  return config;
523 }
524 
525 
526 /***************************************/
527 /******* set Model Configuration *******/
528 void
530 {
531  Log::instance()->debug( "Setting model configuration\n" );
532 
534  _roc_curve.reset();
535 
536  Log::instance()->debug( "Creating sampler\n" );
537 
538  _samp = createSampler( config->getSubsection( "Sampler" ) );
539 
540  Log::instance()->debug( "Getting sampler attributes\n" );
541 
542  _env = _samp->getEnvironment();
543 
544  _presence = _samp->getPresences();
545 
546  _absence = _samp->getAbsences();
547 
548  Log::instance()->debug( "Getting algorithm from algorithm factory\n" );
549 
550  _alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) );
551 
552  // Model creation options
553  if ( ConstConfigurationPtr options_config = config->getSubsection( "Options", false ) ) {
554 
555  ConstConfigurationPtr occ_filter_config = options_config->getSubsection( "OccurrencesFilter", false );
556 
557  if ( occ_filter_config ) {
558 
559  ConstConfigurationPtr su_config = occ_filter_config->getSubsection( "SpatiallyUnique", false );
560 
561  if ( su_config ) {
562 
563  _samp->spatiallyUnique();
564  }
565 
566  ConstConfigurationPtr eu_config = occ_filter_config->getSubsection( "EnvironmentallyUnique", false );
567 
568  if ( eu_config ) {
569 
570  _samp->environmentallyUnique();
571  }
572  }
573  }
574 
575  Log::instance()->debug( "Assigning sampler to algorithm\n" );
576 
577  _alg->setSampler( _samp );
578 }
579 
580 
581 /********************************************/
582 /******* set Projection Configuration *******/
583 void
585 {
586  Log::instance()->debug( "Setting projection configuration\n" );
587 
588  try {
589 
590  _alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) );
591 
592  _projEnv = createEnvironment( config->getSubsection( "Environment" ) );
593 
594  ConstConfigurationPtr output_param_config = config->getSubsection( "OutputParameters" );
595 
596  ConstConfigurationPtr template_layer_config = output_param_config->getSubsection( "TemplateLayer" );
597 
598  string formatId = template_layer_config->getAttribute( "Id" );
599 
600  _format = MapFormat( formatId.c_str() );
601 
602  try {
603 
604  string fileType;
605 
606  fileType = output_param_config->getAttribute( "FileType" );
607 
608  if ( ! fileType.empty() ) {
609 
610  Log::instance()->debug( "Setting output file type to: %s\n", fileType.c_str() );
611 
612  _format.setFormat( fileType );
613  }
614  }
615  catch ( AttributeNotFound& e ) {
616 
617  // FileType attribute is optional
618  UNUSED(e);
619  }
620 
621  try {
622 
623  ConstConfigurationPtr stats_param_config = config->getSubsection( "Statistics" );
624  ConstConfigurationPtr areastats_param_config = stats_param_config->getSubsection( "AreaStatistics" );
625 
626  double threshold = areastats_param_config->getAttributeAsDouble( "PredictionThreshold", 0.5 );
627 
628  _actualAreaStats->reset( threshold );
629  }
630  catch ( SubsectionNotFound& e ) {
631 
632  // Statistics element is optional and AreaStatistics subelement is optional
633  UNUSED(e);
634  }
635  }
636  catch( ConfigurationException& e ) {
637 
638  Log::instance()->error( "Projection deserialization exception: %s\n", e.what() );
639 
640  throw e;
641  }
642 }
643 
644 
645 /********************************************/
646 /******* set Statistics Configuration *******/
647 void
649 {
650  Log::instance()->debug( "Setting statistics configuration\n" );
651 
653  _roc_curve.reset();
654 
655  bool calc_matrix = false;
656  double threshold = CONF_MATRIX_DEFAULT_THRESHOLD;
657  int ignore_absences_int = 0;
658 
659  bool calc_roc = false;
660  int resolution = -1;
661  int num_background = -1;
662  double max_omission = 1.0;
663  int use_absences_as_background_int = 0;
664 
665  try {
666 
667  ConfigurationPtr statistics_param = config->getSubsection( "Statistics" );
668 
669  try {
670 
671  ConfigurationPtr matrix_param = statistics_param->getSubsection( "ConfusionMatrix" );
672 
673  calc_matrix = true;
674 
675  std::string threshold_str = matrix_param->getAttribute( "Threshold", "" );
676 
677  if ( threshold_str.compare( "lpt" ) == 0 ) {
678 
679  threshold = -1.0;
680  }
681  else {
682 
683  threshold = matrix_param->getAttributeAsDouble( "Threshold", CONF_MATRIX_DEFAULT_THRESHOLD );
684  }
685 
686  ignore_absences_int = matrix_param->getAttributeAsInt( "IgnoreAbsences", 0 );
687 
688  if ( threshold < 0.0 ) {
689 
690  if ( _samp && _alg ) {
691 
693 
694  threshold = _confusion_matrix.getThreshold();
695  }
696  else {
697 
698  threshold = CONF_MATRIX_DEFAULT_THRESHOLD;
699 
700  Log::instance()->error( "Cannot determine lowest training threshold without a Model and a Sampler. The default confusion matrix threshold will be used.\n" );
701  }
702  }
703  }
704  catch( SubsectionNotFound& e ) {
705 
706  Log::instance()->info( "Confusion matrix not calculated\n" );
707  UNUSED(e);
708  }
709 
710  try {
711 
712  ConfigurationPtr roc_param = statistics_param->getSubsection( "RocCurve" );
713 
714  calc_roc = true;
715 
716  resolution = roc_param->getAttributeAsInt( "Resolution", -1 );
717 
718  num_background = roc_param->getAttributeAsInt( "BackgroundPoints", -1 );
719 
720  max_omission = roc_param->getAttributeAsDouble( "MaxOmission", 1.0 );
721 
722  use_absences_as_background_int = roc_param->getAttributeAsInt( "UseAbsencesAsBackground", 0 );
723  }
724  catch( SubsectionNotFound& e ) {
725 
726  Log::instance()->info( "ROC curve not calculated\n" );
727  UNUSED(e);
728  }
729  }
730  catch( SubsectionNotFound& e ) {
731 
732  // For backwards compatibility, calculate matrix and ROC if
733  // <Statistics> is not present. To avoid this, use an empty <Statistics/> element
734  calc_matrix = true;
735  calc_roc = true;
736  UNUSED(e);
737  }
738 
739  if ( calc_matrix || calc_roc )
740  {
741  if ( ! _samp ) {
742 
743  Log::instance()->error( "Sampler not specified for calculating statistics.\n" );
744  return;
745  }
746 
747  if ( ! _alg ) {
748 
749  Log::instance()->error( "Model not specified for calculating statistics.\n" );
750  return;
751  }
752  }
753 
754  int num_presences = _samp->numPresence();
755  int num_absences = _samp->numAbsence();
756 
757  // Confusion matrix can only be calculated with presence and/or absence points
758  if ( calc_matrix && ( num_presences || num_absences ) ) {
759 
760  bool ignore_absences = false;
761 
762  if ( ignore_absences_int > 0 ) {
763 
764  ignore_absences = true;
765  }
766 
767  _confusion_matrix.reset( threshold, ignore_absences );
769  }
770 
771  // ROC curve can only be calculated with presence points
772  // No absence points will force background points to be generated
773  if ( calc_roc && num_presences ) {
774 
775  bool use_absences_as_background = false;
776 
777  resolution = (resolution <= 0) ? ROC_DEFAULT_RESOLUTION : resolution;
778 
779  if ( use_absences_as_background_int > 0 ) {
780 
781  use_absences_as_background = true;
782 
783  _roc_curve.initialize( resolution, use_absences_as_background );
784  }
785  else {
786 
787  if ( num_background > 0 ) {
788 
789  _roc_curve.initialize( resolution, num_background );
790  }
791  else {
792 
793  _roc_curve.initialize( resolution );
794  }
795  }
796 
798 
799  _roc_curve.getTotalArea(); // call method to force serialization
800 
801  if ( max_omission < 1.0 ) {
802 
803  _roc_curve.getPartialAreaRatio( max_omission ); // call method to force serialization
804  }
805  }
806 }
void reset(Scalar predictionThreshold=CONF_MATRIX_DEFAULT_THRESHOLD, bool ignoreAbsences=false)
void setAbortionCallback(AbortionCallback func, void *param)
void initialize(int resolution=ROC_DEFAULT_RESOLUTION)
Definition: RocCurve.cpp:70
void setProjectionConfiguration(const ConstConfigurationPtr &)
void warn(const char *format,...)
'Warn' level.
Definition: Log.cpp:273
void setModelProjectionCallback(ModelProjectionCallback func, void *param)
ConfusionMatrix _confusion_matrix
int numAvailableAlgorithms()
static AlgorithmPtr newAlgorithm(std::string const id)
static AlgMetadata const ** availableAlgorithms()
void addPrediction(Scalar predictionValue)
Definition: AreaStats.cpp:57
double Scalar
Type of map values.
Definition: om_defs.hh:39
AreaStats * getActualAreaStats()
void setModelCallback(ModelCreationCallback func, void *param=0)
void(* ModelCreationCallback)(float progress, void *extra_param)
Definition: om_defs.hh:49
void calculateModelStatistics(const ConstConfigurationPtr &)
const ConfusionMatrix *const getConfusionMatrix()
Model getModel() const
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
void setMapCallback(ModelProjectionCallback func, void *param=0)
void setLogLevel(Log::Level level)
ConfigurationPtr getModelConfiguration() const
Level
Definition: Log.hh:54
OccurrencesPtr _absence
void setLevel(Level level)
Definition: Log.hh:107
void reset(Scalar predictionThreshold=0.5)
Definition: AreaStats.cpp:50
AreaStats * _actualAreaStats
EnvironmentPtr createEnvironment(const std::vector< std::string > &categs, const std::vector< std::string > &maps, const std::string &mask_file)
Definition: Environment.cpp:55
AlgorithmPtr _alg
SamplerPtr createSampler(const EnvironmentPtr &env, const OccurrencesPtr &presence, const OccurrencesPtr &absence)
Definition: Sampler.cpp:52
int createMap(const EnvironmentPtr &env, char const *output_file, MapFormat &format)
void error(const char *format,...)
'Error' level.
Definition: Log.cpp:290
bool hasEnvironment()
AreaStats * _estimatedAreaStats
char _error[256]
void calculate(const EnvironmentPtr &env, const Model &model, const OccurrencesPtr &presences, const OccurrencesPtr &absences=OccurrencesPtr())
Scalar getValue(const ConstEnvironmentPtr &env, Coord x, Coord y)
#define UNUSED(symbol)
Definition: os_specific.hh:55
bool ready() const
Definition: RocCurve.hh:144
RocCurve _roc_curve
static bool createMap(const Model &model, const EnvironmentPtr &env, Map *map, AreaStats *areaStats=0, CallbackWrapper *callbackWrapper=0)
Definition: Projector.cpp:59
int setOccurrences(const OccurrencesPtr &presence, const OccurrencesPtr &absence=OccurrencesPtr())
AlgMetadata const * algorithmMetadata(char const *algorithm_id)
AlgMetadata const ** availableAlgorithms()
EnvironmentPtr _env
void setAbortionCallback(AbortionCallback func, void *param=0)
EnvironmentPtr _projEnv
void reset()
Definition: RocCurve.cpp:100
Definition: Map.hh:49
static int numAvailableAlgorithms()
std::size_t size() const
Definition: Sample.hh:70
static RasterFactory & instance()
void setLowestTrainingThreshold(const Model &model, const SamplerPtr &sampler)
int setAlgorithm(std::string const id, int nparam, AlgParameter const *param)
std::string getVersion()
bool ready() const
SamplerPtr _samp
static AlgMetadata const * algorithmMetadata(std::string const algorithm_id)
void setSampler(const SamplerPtr &sampler)
double getThreshold() const
bool(* AbortionCallback)(void *extra_param)
Definition: om_defs.hh:64
OccurrencesPtr _presence
#define ROC_DEFAULT_RESOLUTION
Definition: RocCurve.hh:40
void info(const char *format,...)
'Info' level.
Definition: Log.cpp:256
void setEnvironment(std::vector< std::string > categ_map, std::vector< std::string > continuous_map, const std::string &mask)
void setModelCreationCallback(ModelCreationCallback func, void *param)
void setFormat(int format)
Definition: MapFormat.cpp:151
const SamplerPtr & getSampler() const
void copyDefaults(const Map &map)
Definition: MapFormat.cpp:101
void calculate(const Model &model, const SamplerPtr &sampler)
Definition: RocCurve.cpp:131
ConfigurationPtr getConfiguration() const
Definition: RocCurve.cpp:739
AreaStats * getEstimatedAreaStats(double proportionAreaToSample=0.01)
double Coord
Type of map coordinates.
Definition: om_defs.hh:38
MapFormat _format
RocCurve *const getRocCurve()
ConfigurationPtr getConfiguration() const
void(* ModelProjectionCallback)(float progress, void *extra_param)
Definition: om_defs.hh:58
#define CONF_MATRIX_DEFAULT_THRESHOLD
double getTotalArea()
Definition: RocCurve.cpp:602
CallbackWrapper _callback_wrapper
void debug(const char *format,...)
'Debug' level.
Definition: Log.cpp:237
double getPartialAreaRatio(double e=1.0)
Definition: RocCurve.cpp:615
void setModelConfiguration(const ConstConfigurationPtr &)
Definition: Sample.hh:25