openModeller
Version 1.4.0
|
00001 00028 #include <openmodeller/OpenModeller.hh> 00029 00030 #include <openmodeller/om_defs.hh> 00031 #include <openmodeller/Log.hh> 00032 #include <openmodeller/Algorithm.hh> 00033 #include <openmodeller/AlgParameter.hh> 00034 #include <openmodeller/Sampler.hh> 00035 #include <openmodeller/Occurrences.hh> 00036 #include <openmodeller/AreaStats.hh> 00037 #include <openmodeller/Occurrence.hh> 00038 #include <openmodeller/MapFormat.hh> 00039 #include <openmodeller/AlgorithmFactory.hh> 00040 #include <openmodeller/Environment.hh> 00041 #include <openmodeller/Configuration.hh> 00042 #include <openmodeller/Model.hh> 00043 #include <openmodeller/CallbackWrapper.hh> 00044 00045 #include <openmodeller/env_io/Map.hh> 00046 #include <openmodeller/env_io/RasterFactory.hh> 00047 00048 #include <openmodeller/Exceptions.hh> 00049 00050 #include <string> 00051 using std::string; 00052 00053 /*** Callback "setters" ***/ 00054 00055 void OpenModeller::setModelCallback( ModelCreationCallback func, void *param ) { 00056 00057 _callback_wrapper.setModelCreationCallback( func, param ); 00058 } 00059 00060 void OpenModeller::setMapCallback( ModelProjectionCallback func, void *param ) { 00061 00062 _callback_wrapper.setModelProjectionCallback( func, param ); 00063 } 00064 00065 void OpenModeller::setAbortionCallback( AbortionCallback func, void *param ) { 00066 00067 _callback_wrapper.setAbortionCallback( func, param ); 00068 } 00069 00070 /****************************************************************/ 00071 /************************* Open Modeller ************************/ 00072 00073 /********************/ 00074 /*** constructors ***/ 00075 00076 OpenModeller::OpenModeller(): 00077 _confusion_matrix(), 00078 _roc_curve() 00079 { 00080 _error[0] = '\0'; 00081 00082 _actualAreaStats = new AreaStats(); 00083 _estimatedAreaStats = new AreaStats(); 00084 } 00085 00086 00087 /******************/ 00088 /*** destructor ***/ 00089 00090 OpenModeller::~OpenModeller() 00091 { 00092 delete _actualAreaStats; 00093 delete _estimatedAreaStats; 00094 } 00095 00096 00097 /*********************/ 00098 /*** get log level ***/ 00099 void 00100 OpenModeller::setLogLevel( Log::Level level ) 00101 { 00102 Log::instance()->setLevel( level ); 00103 } 00104 00105 00106 /*******************/ 00107 /*** get Version ***/ 00108 std::string 00109 OpenModeller::getVersion() 00110 { 00111 return std::string( OM_VERSION ); 00112 } 00113 00114 /****************************/ 00115 /*** available Algorithms ***/ 00116 AlgMetadata const ** 00117 OpenModeller::availableAlgorithms() 00118 { 00119 return AlgorithmFactory::availableAlgorithms(); 00120 } 00121 00122 00123 /**************************/ 00124 /*** algorithm Metadata ***/ 00125 AlgMetadata const* 00126 OpenModeller::algorithmMetadata( char const *algorithm_id ) 00127 { 00128 return AlgorithmFactory::algorithmMetadata( algorithm_id ); 00129 } 00130 00131 00132 /********************************/ 00133 /*** num Available Algorithms ***/ 00134 int 00135 OpenModeller::numAvailableAlgorithms() 00136 { 00137 return AlgorithmFactory::numAvailableAlgorithms(); 00138 } 00139 00140 bool 00141 OpenModeller::hasEnvironment() 00142 { 00143 return ( _env || ( (_presence)? _presence->hasEnvironment() : false ) ); 00144 } 00145 00146 /***********************/ 00147 /*** set Occurrences ***/ 00148 int 00149 OpenModeller::setOccurrences( const OccurrencesPtr& presence, 00150 const OccurrencesPtr& absence ) 00151 { 00152 if ( ( ! presence || presence->numOccurrences() == 0 ) && 00153 ( ! absence || absence->numOccurrences() == 0 ) ) { 00154 00155 Log::instance()->error( "Occurrences must not be empty\n" ); 00156 return 0; 00157 } 00158 _presence = presence; 00159 _absence = absence; 00160 00161 return 1; 00162 } 00163 00164 /***********************/ 00165 /*** set Environment ***/ 00166 void 00167 OpenModeller::setEnvironment( std::vector<std::string> categ_map, 00168 std::vector<std::string> continuous_map, 00169 const std::string& mask ) 00170 { 00171 _env = createEnvironment( categ_map, continuous_map, mask); 00172 } 00173 00174 /*******************/ 00175 /*** set Sampler ***/ 00176 void 00177 OpenModeller::setSampler( const SamplerPtr& sampler ) 00178 { 00179 _samp = sampler; 00180 } 00181 00182 /*********************/ 00183 /*** set Algorithm ***/ 00184 int 00185 OpenModeller::setAlgorithm( std::string const id, int nparam, 00186 AlgParameter const *param ) 00187 { 00188 if ( nparam && ! param ) { 00189 00190 Log::instance()->error( "Wrong parameters when setting algorithm\n" ); 00191 return 0; 00192 } 00193 00194 if ( ! _samp ) { 00195 00196 // Create a default sampler if none was previously provided 00197 if ( ! hasEnvironment() ) { 00198 00199 Log::instance()->warn( "Sampler could not be initialized. Environment not set.\n" ); 00200 return 0; 00201 } 00202 else if ( ( ! _presence ) && ( ! _absence ) ) { 00203 00204 Log::instance()->warn( "Sampler could not be initialized. Occurrences not set.\n" ); 00205 return 0; 00206 } 00207 else { 00208 00209 // _env and (_presence or _absence) are set 00210 setSampler( createSampler( _env, _presence, _absence ) ); 00211 } 00212 } 00213 00214 _alg = AlgorithmFactory::newAlgorithm( id ); 00215 00216 if ( ! _alg ) { 00217 00218 Log::instance()->error( _error, "Could not find (%s) algorithm.", id.c_str() ); 00219 return 0; 00220 } 00221 00222 _alg->setSampler( _samp ); 00223 _alg->setParameters( nparam, param ); 00224 00225 return 1; 00226 } 00227 00228 /********************/ 00229 /*** create Model ***/ 00230 int 00231 OpenModeller::createModel() 00232 { 00233 _confusion_matrix.reset(); 00234 _roc_curve.reset(); 00235 00236 Log::instance()->info( "Creating model\n" ); 00237 00238 // Sampler 00239 if ( ! _samp ) { 00240 00241 Log::instance()->error( "Sampler not specified for model creation.\n" ); 00242 return 0; 00243 } 00244 00245 // Algorithm. 00246 if ( ! _alg ) { 00247 00248 Log::instance()->error( "Algorithm not specified for model creation.\n" ); 00249 return 0; 00250 } 00251 00252 _alg->createModel( _samp, &_callback_wrapper ); 00253 00254 // note: Leave the 2 spaces in the end of the message to cover the 00255 // previous model creation progress log. 00256 Log::instance()->info( "Finished creating model \n" ); 00257 00258 return 1; 00259 } 00260 00261 00262 /******************/ 00263 /*** create Map ***/ 00264 int 00265 OpenModeller::createMap( const EnvironmentPtr & env, char const *output_file, MapFormat& output_format ) 00266 { 00267 Log::instance()->info( "Projecting model\n" ); 00268 00269 if ( ! env ) { 00270 00271 Log::instance()->error( "Projection environment not specified\n" ); 00272 return 0; 00273 } 00274 00275 _projEnv = env; 00276 00277 if ( ! _alg ) { 00278 00279 Log::instance()->error( "Algorithm not specified\n" ); 00280 return 0; 00281 } 00282 00283 Model model( _alg->getModel() ); 00284 00285 Map *mask = _projEnv->getMask(); 00286 00287 // If mask is undefined, use first layer as a mask 00288 if ( ! mask ) { 00289 00290 mask = _projEnv->getLayer(0); 00291 } 00292 00293 // Store output format in property 00294 _format = output_format; 00295 00296 // copy mask settings to the output format ONLY when they are undefined 00297 _format.copyDefaults( *mask ); 00298 00299 // try to infer the output file format (default is 64 bit floating tiff). 00300 string fname( output_file ); 00301 00302 int pos = fname.length() - 4; 00303 00304 if ( pos > 0 ) { 00305 00306 if ( fname.compare( pos, 4, ".bmp" ) == 0 ) { 00307 00308 _format.setFormat( MapFormat::GreyBMP ); 00309 Log::instance()->warn ( "Using greyscale bmp as output format based on extension\n" ); 00310 } 00311 } 00312 00313 // Create map on disc. 00314 00315 #ifdef MPI_FOUND 00316 Map map( RasterFactory::instance().create( output_file, output_file, _format ) ); 00317 #else 00318 Map map( RasterFactory::instance().create( output_file, _format ) ); 00319 #endif 00320 00321 bool finished = Projector::createMap( model, _projEnv, &map, _actualAreaStats, &_callback_wrapper ); 00322 00323 if ( ! finished ) { 00324 00325 Log::instance()->info( "Error during model projection\n" ); 00326 return 0; 00327 } 00328 00329 Log::instance()->info( "Finished projecting model\n" ); 00330 00331 return 1; 00332 } 00333 00334 int 00335 OpenModeller::createMap( const EnvironmentPtr & env, char const *output_file ) 00336 { 00337 return createMap( env, output_file, _format ); 00338 } 00339 00340 /******************/ 00341 /*** create Map ***/ 00342 int 00343 OpenModeller::createMap( char const *output_file ) 00344 { 00345 if ( ! _projEnv ) { 00346 00347 _projEnv = _env; 00348 } 00349 00350 return createMap( _projEnv, output_file, _format ); 00351 } 00352 00353 /******************/ 00354 /*** create Map ***/ 00355 int 00356 OpenModeller::createMap( char const *output_file, MapFormat& output_format ) 00357 { 00358 if ( ! _projEnv ) { 00359 00360 _projEnv = _env; 00361 } 00362 00363 return createMap( _projEnv, output_file, output_format ); 00364 } 00365 00366 /**********************************/ 00367 /******* get Value ****************/ 00368 Scalar 00369 OpenModeller::getValue(const ConstEnvironmentPtr& env, Coord x, Coord y) 00370 { 00371 if ( ! _env ) { 00372 00373 return -1.0; 00374 } 00375 00376 // FIXME: enable geotransformation 00377 const Sample& sample = env->get( x, y ); 00378 00379 if ( sample.size() == 0 ) { 00380 00381 return -1.0; 00382 } 00383 00384 Scalar val = _alg->getValue( sample ); 00385 if ( val < 0.0 ) val = 0.0; 00386 if ( val > 1.0 ) val = 1.0; 00387 00388 return val; 00389 } 00390 00391 00392 /**********************************/ 00393 /******* get Value ****************/ 00394 Scalar 00395 OpenModeller::getValue( Scalar const *environment_values ) 00396 { 00397 Sample tmp( _env->numLayers() ,environment_values ); 00398 return _alg->getValue( tmp ); 00399 } 00400 00401 00402 /************************************/ 00403 /******* get Actual AreaStats *******/ 00404 AreaStats * 00405 OpenModeller::getActualAreaStats() 00406 { 00407 return new AreaStats( _actualAreaStats ); 00408 } 00409 00410 00411 /***************************************/ 00412 /******* get Estimated AreaStats *******/ 00413 AreaStats * OpenModeller::getEstimatedAreaStats(double proportionAreaToSample) 00414 { 00415 return getEstimatedAreaStats( _env, proportionAreaToSample ); 00416 } 00417 00418 00419 /***************************************/ 00420 /******* get Estimated AreaStats *******/ 00421 AreaStats * OpenModeller::getEstimatedAreaStats(const ConstEnvironmentPtr& env, 00422 double proportionAreaToSample) 00423 { 00424 int i, sampleSize, numCells, xdim, ydim; 00425 00426 if ( !env ) { 00427 00428 // this method does not work without _env properly set 00429 return NULL; 00430 } 00431 00432 if ( !_estimatedAreaStats ) { 00433 00434 _estimatedAreaStats = new AreaStats; 00435 } 00436 else { 00437 00438 _estimatedAreaStats->reset(); 00439 } 00440 00441 // get number of cells to sample 00442 // note that the total area does not take the mask into account 00443 // thus all cells (masked or unmasked) are counted 00444 env->getMask()->getDim(&xdim, &ydim); 00445 numCells = xdim * ydim; 00446 00447 sampleSize = (int) (numCells * proportionAreaToSample); 00448 00449 for (i = 0; i < sampleSize; i++) { 00450 00451 const Sample& sample = env->getRandom(); 00452 00453 _estimatedAreaStats->addPrediction(_alg->getValue(sample)); 00454 } 00455 00456 return _estimatedAreaStats; 00457 } 00458 00459 00460 /************************************/ 00461 /******* get Confusion Matrix *******/ 00462 const ConfusionMatrix * const OpenModeller::getConfusionMatrix() 00463 { 00464 if ( _confusion_matrix.ready() ) { 00465 00466 return &_confusion_matrix; 00467 } 00468 00469 _confusion_matrix.calculate( getModel(), getSampler() ); 00470 00471 return &_confusion_matrix; 00472 } 00473 00474 /*****************************/ 00475 /******* get Roc Curve *******/ 00476 RocCurve * const OpenModeller::getRocCurve() 00477 { 00478 if ( _roc_curve.ready() ) { 00479 00480 return &_roc_curve; 00481 } 00482 00483 _roc_curve.calculate( getModel(), getSampler() ); 00484 00485 return &_roc_curve; 00486 } 00487 00488 00489 /***************************************/ 00490 /******* get Model Configuration *******/ 00491 ConfigurationPtr 00492 OpenModeller::getModelConfiguration() const 00493 { 00494 ConfigurationPtr config( new ConfigurationImpl("SerializedModel")); 00495 00496 ConfigurationPtr sampler_config( _samp->getConfiguration() ); 00497 00498 config->addSubsection( sampler_config ); 00499 00500 ConfigurationPtr alg_config( _alg->getConfiguration() ); 00501 00502 config->addSubsection( alg_config ); 00503 00504 ConfigurationPtr stats_config( new ConfigurationImpl("Statistics") ); 00505 00506 if ( _confusion_matrix.ready() ) { 00507 00508 ConfigurationPtr cm_config( _confusion_matrix.getConfiguration() ); 00509 00510 stats_config->addSubsection( cm_config ); 00511 } 00512 00513 if ( _roc_curve.ready() ) { 00514 00515 ConfigurationPtr roc_config( _roc_curve.getConfiguration() ); 00516 00517 stats_config->addSubsection( roc_config ); 00518 } 00519 00520 config->addSubsection( stats_config ); 00521 00522 return config; 00523 } 00524 00525 00526 /***************************************/ 00527 /******* set Model Configuration *******/ 00528 void 00529 OpenModeller::setModelConfiguration( const ConstConfigurationPtr & config ) 00530 { 00531 Log::instance()->debug( "Setting model configuration\n" ); 00532 00533 _confusion_matrix.reset(); 00534 _roc_curve.reset(); 00535 00536 Log::instance()->debug( "Creating sampler\n" ); 00537 00538 _samp = createSampler( config->getSubsection( "Sampler" ) ); 00539 00540 Log::instance()->debug( "Getting sampler attributes\n" ); 00541 00542 _env = _samp->getEnvironment(); 00543 00544 _presence = _samp->getPresences(); 00545 00546 _absence = _samp->getAbsences(); 00547 00548 Log::instance()->debug( "Getting algorithm from algorithm factory\n" ); 00549 00550 _alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) ); 00551 00552 // Model creation options 00553 if ( ConstConfigurationPtr options_config = config->getSubsection( "Options", false ) ) { 00554 00555 ConstConfigurationPtr occ_filter_config = options_config->getSubsection( "OccurrencesFilter", false ); 00556 00557 if ( occ_filter_config ) { 00558 00559 ConstConfigurationPtr su_config = occ_filter_config->getSubsection( "SpatiallyUnique", false ); 00560 00561 if ( su_config ) { 00562 00563 _samp->spatiallyUnique(); 00564 } 00565 00566 ConstConfigurationPtr eu_config = occ_filter_config->getSubsection( "EnvironmentallyUnique", false ); 00567 00568 if ( eu_config ) { 00569 00570 _samp->environmentallyUnique(); 00571 } 00572 } 00573 } 00574 00575 Log::instance()->debug( "Assigning sampler to algorithm\n" ); 00576 00577 _alg->setSampler( _samp ); 00578 } 00579 00580 00581 /********************************************/ 00582 /******* set Projection Configuration *******/ 00583 void 00584 OpenModeller::setProjectionConfiguration( const ConstConfigurationPtr & config ) 00585 { 00586 Log::instance()->debug( "Setting projection configuration\n" ); 00587 00588 try { 00589 00590 _alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) ); 00591 00592 _projEnv = createEnvironment( config->getSubsection( "Environment" ) ); 00593 00594 ConstConfigurationPtr output_param_config = config->getSubsection( "OutputParameters" ); 00595 00596 ConstConfigurationPtr template_layer_config = output_param_config->getSubsection( "TemplateLayer" ); 00597 00598 string formatId = template_layer_config->getAttribute( "Id" ); 00599 00600 _format = MapFormat( formatId.c_str() ); 00601 00602 try { 00603 00604 string fileType; 00605 00606 fileType = output_param_config->getAttribute( "FileType" ); 00607 00608 if ( ! fileType.empty() ) { 00609 00610 Log::instance()->debug( "Setting output file type to: %s\n", fileType.c_str() ); 00611 00612 _format.setFormat( fileType ); 00613 } 00614 } 00615 catch ( AttributeNotFound& e ) { 00616 00617 // FileType attribute is optional 00618 UNUSED(e); 00619 } 00620 00621 try { 00622 00623 ConstConfigurationPtr stats_param_config = config->getSubsection( "Statistics" ); 00624 ConstConfigurationPtr areastats_param_config = stats_param_config->getSubsection( "AreaStatistics" ); 00625 00626 double threshold = areastats_param_config->getAttributeAsDouble( "PredictionThreshold", 0.5 ); 00627 00628 _actualAreaStats->reset( threshold ); 00629 } 00630 catch ( SubsectionNotFound& e ) { 00631 00632 // Statistics element is optional and AreaStatistics subelement is optional 00633 UNUSED(e); 00634 } 00635 } 00636 catch( ConfigurationException& e ) { 00637 00638 Log::instance()->error( "Projection deserialization exception: %s\n", e.what() ); 00639 00640 throw e; 00641 } 00642 } 00643 00644 00645 /********************************************/ 00646 /******* set Statistics Configuration *******/ 00647 void 00648 OpenModeller::calculateModelStatistics( const ConstConfigurationPtr & config ) 00649 { 00650 Log::instance()->debug( "Setting statistics configuration\n" ); 00651 00652 _confusion_matrix.reset(); 00653 _roc_curve.reset(); 00654 00655 bool calc_matrix = false; 00656 double threshold = CONF_MATRIX_DEFAULT_THRESHOLD; 00657 int ignore_absences_int = 0; 00658 00659 bool calc_roc = false; 00660 int resolution = -1; 00661 int num_background = -1; 00662 double max_omission = 1.0; 00663 int use_absences_as_background_int = 0; 00664 00665 try { 00666 00667 ConfigurationPtr statistics_param = config->getSubsection( "Statistics" ); 00668 00669 try { 00670 00671 ConfigurationPtr matrix_param = statistics_param->getSubsection( "ConfusionMatrix" ); 00672 00673 calc_matrix = true; 00674 00675 std::string threshold_str = matrix_param->getAttribute( "Threshold", "" ); 00676 00677 if ( threshold_str.compare( "lpt" ) == 0 ) { 00678 00679 threshold = -1.0; 00680 } 00681 else { 00682 00683 threshold = matrix_param->getAttributeAsDouble( "Threshold", CONF_MATRIX_DEFAULT_THRESHOLD ); 00684 } 00685 00686 ignore_absences_int = matrix_param->getAttributeAsInt( "IgnoreAbsences", 0 ); 00687 00688 if ( threshold < 0.0 ) { 00689 00690 if ( _samp && _alg ) { 00691 00692 _confusion_matrix.setLowestTrainingThreshold( getModel(), getSampler() ); 00693 00694 threshold = _confusion_matrix.getThreshold(); 00695 } 00696 else { 00697 00698 threshold = CONF_MATRIX_DEFAULT_THRESHOLD; 00699 00700 Log::instance()->error( "Cannot determine lowest training threshold without a Model and a Sampler. The default confusion matrix threshold will be used.\n" ); 00701 } 00702 } 00703 } 00704 catch( SubsectionNotFound& e ) { 00705 00706 Log::instance()->info( "Confusion matrix not calculated\n" ); 00707 UNUSED(e); 00708 } 00709 00710 try { 00711 00712 ConfigurationPtr roc_param = statistics_param->getSubsection( "RocCurve" ); 00713 00714 calc_roc = true; 00715 00716 resolution = roc_param->getAttributeAsInt( "Resolution", -1 ); 00717 00718 num_background = roc_param->getAttributeAsInt( "BackgroundPoints", -1 ); 00719 00720 max_omission = roc_param->getAttributeAsDouble( "MaxOmission", 1.0 ); 00721 00722 use_absences_as_background_int = roc_param->getAttributeAsInt( "UseAbsencesAsBackground", 0 ); 00723 } 00724 catch( SubsectionNotFound& e ) { 00725 00726 Log::instance()->info( "ROC curve not calculated\n" ); 00727 UNUSED(e); 00728 } 00729 } 00730 catch( SubsectionNotFound& e ) { 00731 00732 // For backwards compatibility, calculate matrix and ROC if 00733 // <Statistics> is not present. To avoid this, use an empty <Statistics/> element 00734 calc_matrix = true; 00735 calc_roc = true; 00736 UNUSED(e); 00737 } 00738 00739 if ( calc_matrix || calc_roc ) 00740 { 00741 if ( ! _samp ) { 00742 00743 Log::instance()->error( "Sampler not specified for calculating statistics.\n" ); 00744 return; 00745 } 00746 00747 if ( ! _alg ) { 00748 00749 Log::instance()->error( "Model not specified for calculating statistics.\n" ); 00750 return; 00751 } 00752 } 00753 00754 int num_presences = _samp->numPresence(); 00755 int num_absences = _samp->numAbsence(); 00756 00757 // Confusion matrix can only be calculated with presence and/or absence points 00758 if ( calc_matrix && ( num_presences || num_absences ) ) { 00759 00760 bool ignore_absences = false; 00761 00762 if ( ignore_absences_int > 0 ) { 00763 00764 ignore_absences = true; 00765 } 00766 00767 _confusion_matrix.reset( threshold, ignore_absences ); 00768 _confusion_matrix.calculate( getModel(), getSampler() ); 00769 } 00770 00771 // ROC curve can only be calculated with presence points 00772 // No absence points will force background points to be generated 00773 if ( calc_roc && num_presences ) { 00774 00775 bool use_absences_as_background = false; 00776 00777 resolution = (resolution <= 0) ? ROC_DEFAULT_RESOLUTION : resolution; 00778 00779 if ( use_absences_as_background_int > 0 ) { 00780 00781 use_absences_as_background = true; 00782 00783 _roc_curve.initialize( resolution, use_absences_as_background ); 00784 } 00785 else { 00786 00787 if ( num_background > 0 ) { 00788 00789 _roc_curve.initialize( resolution, num_background ); 00790 } 00791 else { 00792 00793 _roc_curve.initialize( resolution ); 00794 } 00795 } 00796 00797 _roc_curve.calculate( getModel(), getSampler() ); 00798 00799 _roc_curve.getTotalArea(); // call method to force serialization 00800 00801 if ( max_omission < 1.0 ) { 00802 00803 _roc_curve.getPartialAreaRatio( max_omission ); // call method to force serialization 00804 } 00805 } 00806 }