openModeller  Version 1.4.0
OpenModeller.cpp
Go to the documentation of this file.
00001 
00028 #include <openmodeller/OpenModeller.hh>
00029 
00030 #include <openmodeller/om_defs.hh>
00031 #include <openmodeller/Log.hh>
00032 #include <openmodeller/Algorithm.hh>
00033 #include <openmodeller/AlgParameter.hh>
00034 #include <openmodeller/Sampler.hh>
00035 #include <openmodeller/Occurrences.hh>
00036 #include <openmodeller/AreaStats.hh>
00037 #include <openmodeller/Occurrence.hh>
00038 #include <openmodeller/MapFormat.hh>
00039 #include <openmodeller/AlgorithmFactory.hh>
00040 #include <openmodeller/Environment.hh>
00041 #include <openmodeller/Configuration.hh>
00042 #include <openmodeller/Model.hh>
00043 #include <openmodeller/CallbackWrapper.hh>
00044 
00045 #include <openmodeller/env_io/Map.hh>
00046 #include <openmodeller/env_io/RasterFactory.hh>
00047 
00048 #include <openmodeller/Exceptions.hh>
00049 
00050 #include <string>
00051 using std::string;
00052 
00053 /*** Callback "setters" ***/
00054 
00055 void OpenModeller::setModelCallback( ModelCreationCallback func, void *param ) {
00056 
00057   _callback_wrapper.setModelCreationCallback( func, param );
00058 }
00059 
00060 void OpenModeller::setMapCallback( ModelProjectionCallback func, void *param ) {
00061 
00062   _callback_wrapper.setModelProjectionCallback( func, param );
00063 }
00064 
00065 void OpenModeller::setAbortionCallback( AbortionCallback func, void *param ) {
00066 
00067   _callback_wrapper.setAbortionCallback( func, param );
00068 }
00069 
00070 /****************************************************************/
00071 /************************* Open Modeller ************************/
00072 
00073 /********************/
00074 /*** constructors ***/
00075 
00076 OpenModeller::OpenModeller():
00077   _confusion_matrix(),
00078   _roc_curve()
00079 {
00080   _error[0] = '\0';
00081 
00082   _actualAreaStats = new AreaStats();
00083   _estimatedAreaStats = new AreaStats();
00084 }
00085 
00086 
00087 /******************/
00088 /*** destructor ***/
00089 
00090 OpenModeller::~OpenModeller()
00091 {
00092   delete _actualAreaStats;
00093   delete _estimatedAreaStats;
00094 }
00095 
00096 
00097 /*********************/
00098 /*** get log level ***/
00099 void
00100 OpenModeller::setLogLevel( Log::Level level )
00101 {
00102   Log::instance()->setLevel( level );
00103 }
00104 
00105 
00106 /*******************/
00107 /*** get Version ***/
00108 std::string
00109 OpenModeller::getVersion()
00110 {
00111   return std::string( OM_VERSION );
00112 }
00113 
00114 /****************************/
00115 /*** available Algorithms ***/
00116 AlgMetadata const **
00117 OpenModeller::availableAlgorithms()
00118 {
00119   return AlgorithmFactory::availableAlgorithms();
00120 }
00121 
00122 
00123 /**************************/
00124 /*** algorithm Metadata ***/
00125 AlgMetadata const*
00126 OpenModeller::algorithmMetadata( char const *algorithm_id )
00127 {
00128   return AlgorithmFactory::algorithmMetadata( algorithm_id );
00129 }
00130 
00131 
00132 /********************************/
00133 /*** num Available Algorithms ***/
00134 int
00135 OpenModeller::numAvailableAlgorithms()
00136 {
00137   return AlgorithmFactory::numAvailableAlgorithms();
00138 }
00139 
00140 bool
00141 OpenModeller::hasEnvironment()
00142 {
00143   return ( _env || ( (_presence)? _presence->hasEnvironment() : false ) );
00144 }
00145 
00146 /***********************/
00147 /*** set Occurrences ***/
00148 int
00149 OpenModeller::setOccurrences( const OccurrencesPtr& presence,
00150                               const OccurrencesPtr& absence )
00151 {
00152   if ( ( ! presence || presence->numOccurrences() == 0 ) &&
00153        ( ! absence  || absence->numOccurrences() == 0 ) ) {
00154 
00155     Log::instance()->error( "Occurrences must not be empty\n" );
00156     return 0;
00157   }
00158   _presence = presence;
00159   _absence  = absence;
00160 
00161   return 1;
00162 }
00163 
00164 /***********************/
00165 /*** set Environment ***/
00166 void
00167 OpenModeller::setEnvironment( std::vector<std::string> categ_map,
00168             std::vector<std::string> continuous_map,
00169             const std::string& mask )
00170 {
00171   _env = createEnvironment( categ_map, continuous_map, mask);
00172 }
00173 
00174 /*******************/
00175 /*** set Sampler ***/
00176 void
00177 OpenModeller::setSampler( const SamplerPtr& sampler )
00178 {
00179   _samp = sampler;
00180 }
00181 
00182 /*********************/
00183 /*** set Algorithm ***/
00184 int
00185 OpenModeller::setAlgorithm( std::string const id, int nparam,
00186                             AlgParameter const *param )
00187 {
00188   if ( nparam && ! param ) {
00189 
00190     Log::instance()->error( "Wrong parameters when setting algorithm\n" );
00191     return 0;
00192   }
00193 
00194   if ( ! _samp ) {
00195 
00196     //  Create a default sampler if none was previously provided
00197     if ( ! hasEnvironment() ) {
00198 
00199       Log::instance()->warn( "Sampler could not be initialized. Environment not set.\n" );
00200       return 0;
00201     }
00202     else if ( ( ! _presence ) && ( ! _absence ) ) {
00203 
00204       Log::instance()->warn( "Sampler could not be initialized. Occurrences not set.\n" );
00205       return 0;
00206     }
00207     else {
00208 
00209       // _env and (_presence or _absence) are set
00210       setSampler( createSampler( _env, _presence, _absence ) );
00211     }
00212   }
00213 
00214   _alg = AlgorithmFactory::newAlgorithm( id );
00215 
00216   if ( ! _alg ) {
00217 
00218     Log::instance()->error( _error, "Could not find (%s) algorithm.", id.c_str() );
00219     return 0;
00220   }
00221 
00222   _alg->setSampler( _samp );
00223   _alg->setParameters( nparam, param );
00224 
00225   return 1;
00226 }
00227 
00228 /********************/
00229 /*** create Model ***/
00230 int
00231 OpenModeller::createModel()
00232 {
00233   _confusion_matrix.reset();
00234   _roc_curve.reset();
00235 
00236   Log::instance()->info( "Creating model\n" );
00237 
00238   // Sampler
00239   if ( ! _samp ) {
00240 
00241     Log::instance()->error( "Sampler not specified for model creation.\n" );
00242     return 0;
00243   }
00244 
00245   // Algorithm.
00246   if ( ! _alg ) {
00247 
00248     Log::instance()->error( "Algorithm not specified for model creation.\n" );
00249     return 0;
00250   }
00251 
00252   _alg->createModel( _samp, &_callback_wrapper );
00253 
00254   // note: Leave the 2 spaces in the end of the message to cover the  
00255   //       previous model creation progress log.
00256   Log::instance()->info( "Finished creating model  \n" );
00257 
00258   return 1;
00259 }
00260 
00261 
00262 /******************/
00263 /*** create Map ***/
00264 int
00265 OpenModeller::createMap( const EnvironmentPtr & env, char const *output_file, MapFormat& output_format )
00266 {
00267   Log::instance()->info( "Projecting model\n" );
00268 
00269   if ( ! env ) {
00270 
00271     Log::instance()->error( "Projection environment not specified\n" );
00272     return 0;
00273   }
00274 
00275   _projEnv = env;
00276 
00277   if ( ! _alg ) {
00278 
00279     Log::instance()->error( "Algorithm not specified\n" );
00280     return 0;
00281   }
00282 
00283   Model model( _alg->getModel() );
00284 
00285   Map *mask = _projEnv->getMask();
00286 
00287   // If mask is undefined, use first layer as a mask
00288   if ( ! mask ) {
00289 
00290     mask = _projEnv->getLayer(0);
00291   }
00292 
00293   // Store output format in property 
00294   _format = output_format;
00295 
00296   // copy mask settings to the output format ONLY when they are undefined 
00297   _format.copyDefaults( *mask );
00298 
00299   // try to infer the output file format (default is 64 bit floating tiff).
00300   string fname( output_file );
00301 
00302   int pos = fname.length() - 4;
00303 
00304   if ( pos > 0 ) {
00305 
00306     if ( fname.compare( pos, 4, ".bmp" ) == 0 ) {
00307 
00308       _format.setFormat( MapFormat::GreyBMP );
00309       Log::instance()->warn ( "Using greyscale bmp as output format based on extension\n" );
00310     }
00311   }
00312 
00313   // Create map on disc.
00314 
00315 #ifdef MPI_FOUND
00316   Map map( RasterFactory::instance().create( output_file, output_file, _format ) );
00317 #else
00318   Map map( RasterFactory::instance().create( output_file, _format ) );
00319 #endif
00320 
00321   bool finished = Projector::createMap( model, _projEnv, &map, _actualAreaStats, &_callback_wrapper );
00322 
00323   if ( ! finished ) {
00324 
00325     Log::instance()->info( "Error during model projection\n" );
00326     return 0;
00327   }
00328 
00329   Log::instance()->info( "Finished projecting model\n" );
00330 
00331   return 1;
00332 }
00333 
00334 int
00335 OpenModeller::createMap( const EnvironmentPtr & env, char const *output_file )
00336 {
00337   return createMap( env, output_file, _format );
00338 }
00339 
00340 /******************/
00341 /*** create Map ***/
00342 int
00343 OpenModeller::createMap( char const *output_file )
00344 {
00345   if ( ! _projEnv ) {
00346 
00347     _projEnv = _env;
00348   }
00349 
00350   return createMap( _projEnv, output_file, _format );
00351 }
00352 
00353 /******************/
00354 /*** create Map ***/
00355 int
00356 OpenModeller::createMap( char const *output_file, MapFormat& output_format )
00357 {
00358   if ( ! _projEnv ) {
00359 
00360     _projEnv = _env;
00361   }
00362 
00363   return createMap( _projEnv, output_file, output_format );
00364 }
00365 
00366 /**********************************/
00367 /******* get Value ****************/
00368 Scalar
00369 OpenModeller::getValue(const ConstEnvironmentPtr& env, Coord x, Coord y)
00370 {
00371   if ( ! _env ) {
00372 
00373     return -1.0;
00374   }
00375 
00376   // FIXME: enable geotransformation
00377   const Sample& sample = env->get( x, y );
00378 
00379   if ( sample.size() == 0 )  {
00380 
00381     return -1.0;
00382   }
00383 
00384   Scalar val = _alg->getValue( sample );
00385   if ( val < 0.0 ) val = 0.0;
00386   if ( val > 1.0 ) val = 1.0;
00387 
00388   return val;
00389 }
00390 
00391 
00392 /**********************************/
00393 /******* get Value ****************/
00394 Scalar
00395 OpenModeller::getValue( Scalar const *environment_values )
00396 {
00397   Sample tmp( _env->numLayers() ,environment_values );
00398   return _alg->getValue( tmp );
00399 }
00400 
00401 
00402 /************************************/
00403 /******* get Actual AreaStats *******/
00404 AreaStats *
00405 OpenModeller::getActualAreaStats()
00406 {
00407   return new AreaStats( _actualAreaStats );
00408 }
00409 
00410 
00411 /***************************************/
00412 /******* get Estimated AreaStats *******/
00413 AreaStats * OpenModeller::getEstimatedAreaStats(double proportionAreaToSample)
00414 {
00415   return getEstimatedAreaStats( _env, proportionAreaToSample );
00416 }
00417 
00418 
00419 /***************************************/
00420 /******* get Estimated AreaStats *******/
00421 AreaStats * OpenModeller::getEstimatedAreaStats(const ConstEnvironmentPtr& env,
00422             double proportionAreaToSample)
00423 {
00424   int i, sampleSize, numCells, xdim, ydim;
00425 
00426   if ( !env ) {
00427 
00428     // this method does not work without _env properly set
00429     return NULL;
00430   }
00431 
00432   if ( !_estimatedAreaStats ) { 
00433 
00434     _estimatedAreaStats = new AreaStats; 
00435   }
00436   else { 
00437 
00438     _estimatedAreaStats->reset(); 
00439   }
00440 
00441   // get number of cells to sample
00442   // note that the total area does not take the mask into account
00443   // thus all cells (masked or unmasked) are counted
00444   env->getMask()->getDim(&xdim, &ydim);
00445   numCells = xdim * ydim; 
00446 
00447   sampleSize = (int) (numCells * proportionAreaToSample);
00448 
00449   for (i = 0; i < sampleSize; i++) { 
00450 
00451     const Sample& sample = env->getRandom();
00452 
00453     _estimatedAreaStats->addPrediction(_alg->getValue(sample)); 
00454   }
00455 
00456   return _estimatedAreaStats;
00457 }
00458 
00459 
00460 /************************************/
00461 /******* get Confusion Matrix *******/
00462 const ConfusionMatrix * const OpenModeller::getConfusionMatrix()
00463 {
00464   if ( _confusion_matrix.ready() ) {
00465 
00466     return &_confusion_matrix;
00467   }
00468 
00469   _confusion_matrix.calculate( getModel(), getSampler() );
00470 
00471   return &_confusion_matrix;
00472 }
00473 
00474 /*****************************/
00475 /******* get Roc Curve *******/
00476 RocCurve * const OpenModeller::getRocCurve()
00477 {
00478   if ( _roc_curve.ready() ) {
00479 
00480     return &_roc_curve;
00481   }
00482 
00483   _roc_curve.calculate( getModel(), getSampler() );
00484 
00485   return &_roc_curve;
00486 }
00487 
00488 
00489 /***************************************/
00490 /******* get Model Configuration *******/
00491 ConfigurationPtr
00492 OpenModeller::getModelConfiguration() const
00493 {
00494   ConfigurationPtr config( new ConfigurationImpl("SerializedModel"));
00495 
00496   ConfigurationPtr sampler_config( _samp->getConfiguration() );
00497 
00498   config->addSubsection( sampler_config );
00499 
00500   ConfigurationPtr alg_config( _alg->getConfiguration() );
00501 
00502   config->addSubsection( alg_config );
00503 
00504   ConfigurationPtr stats_config( new ConfigurationImpl("Statistics") );
00505 
00506   if ( _confusion_matrix.ready() ) {
00507 
00508     ConfigurationPtr cm_config( _confusion_matrix.getConfiguration() );
00509 
00510     stats_config->addSubsection( cm_config );
00511   }
00512 
00513   if ( _roc_curve.ready() ) {
00514 
00515     ConfigurationPtr roc_config( _roc_curve.getConfiguration() );
00516 
00517     stats_config->addSubsection( roc_config );
00518   }
00519 
00520   config->addSubsection( stats_config );
00521 
00522   return config;
00523 }
00524 
00525 
00526 /***************************************/
00527 /******* set Model Configuration *******/
00528 void
00529 OpenModeller::setModelConfiguration( const ConstConfigurationPtr & config )
00530 {
00531   Log::instance()->debug( "Setting model configuration\n" );
00532 
00533   _confusion_matrix.reset();
00534   _roc_curve.reset();
00535 
00536   Log::instance()->debug( "Creating sampler\n" );
00537 
00538   _samp = createSampler( config->getSubsection( "Sampler" ) );
00539 
00540   Log::instance()->debug( "Getting sampler attributes\n" );
00541 
00542   _env = _samp->getEnvironment();
00543 
00544   _presence = _samp->getPresences();
00545 
00546   _absence = _samp->getAbsences();
00547 
00548   Log::instance()->debug( "Getting algorithm from algorithm factory\n" );
00549 
00550   _alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) );
00551 
00552   // Model creation options
00553   if ( ConstConfigurationPtr options_config = config->getSubsection( "Options", false ) ) {
00554 
00555     ConstConfigurationPtr occ_filter_config = options_config->getSubsection( "OccurrencesFilter", false );
00556 
00557     if ( occ_filter_config ) {
00558 
00559       ConstConfigurationPtr su_config = occ_filter_config->getSubsection( "SpatiallyUnique", false );
00560 
00561       if ( su_config ) {
00562 
00563         _samp->spatiallyUnique();
00564       }
00565 
00566       ConstConfigurationPtr eu_config = occ_filter_config->getSubsection( "EnvironmentallyUnique", false );
00567 
00568       if ( eu_config ) {
00569 
00570         _samp->environmentallyUnique();
00571       }
00572     }
00573   }
00574 
00575   Log::instance()->debug( "Assigning sampler to algorithm\n" );
00576 
00577   _alg->setSampler( _samp );
00578 }
00579 
00580 
00581 /********************************************/
00582 /******* set Projection Configuration *******/
00583 void
00584 OpenModeller::setProjectionConfiguration( const ConstConfigurationPtr & config )
00585 {
00586   Log::instance()->debug( "Setting projection configuration\n" );
00587 
00588   try { 
00589 
00590     _alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) );
00591 
00592     _projEnv = createEnvironment( config->getSubsection( "Environment" ) );
00593 
00594     ConstConfigurationPtr output_param_config = config->getSubsection( "OutputParameters" );
00595 
00596     ConstConfigurationPtr template_layer_config = output_param_config->getSubsection( "TemplateLayer" );
00597 
00598     string formatId = template_layer_config->getAttribute( "Id" );
00599 
00600     _format = MapFormat( formatId.c_str() );
00601 
00602     try {
00603 
00604       string fileType;
00605 
00606       fileType = output_param_config->getAttribute( "FileType" );
00607 
00608       if ( ! fileType.empty() ) {
00609 
00610         Log::instance()->debug( "Setting output file type to: %s\n", fileType.c_str() );
00611 
00612         _format.setFormat( fileType );
00613       }
00614     }
00615     catch ( AttributeNotFound& e ) { 
00616 
00617       // FileType attribute is optional
00618       UNUSED(e);
00619     }
00620 
00621     try {
00622 
00623       ConstConfigurationPtr stats_param_config = config->getSubsection( "Statistics" );
00624       ConstConfigurationPtr areastats_param_config = stats_param_config->getSubsection( "AreaStatistics" );
00625 
00626       double threshold = areastats_param_config->getAttributeAsDouble( "PredictionThreshold", 0.5 );
00627 
00628       _actualAreaStats->reset( threshold );
00629     }
00630     catch ( SubsectionNotFound& e ) { 
00631 
00632       // Statistics element is optional and AreaStatistics subelement is optional 
00633       UNUSED(e);
00634     }
00635   }
00636   catch( ConfigurationException& e ) { 
00637 
00638     Log::instance()->error( "Projection deserialization exception: %s\n", e.what() );
00639 
00640     throw e;
00641   }
00642 }
00643 
00644 
00645 /********************************************/
00646 /******* set Statistics Configuration *******/
00647 void
00648 OpenModeller::calculateModelStatistics( const ConstConfigurationPtr & config )
00649 {
00650   Log::instance()->debug( "Setting statistics configuration\n" );
00651 
00652   _confusion_matrix.reset();
00653   _roc_curve.reset();
00654 
00655   bool calc_matrix = false;
00656   double threshold = CONF_MATRIX_DEFAULT_THRESHOLD;
00657   int ignore_absences_int = 0;
00658 
00659   bool calc_roc = false;
00660   int resolution = -1;
00661   int num_background = -1;
00662   double max_omission = 1.0;
00663   int use_absences_as_background_int = 0;
00664 
00665   try {
00666 
00667     ConfigurationPtr statistics_param = config->getSubsection( "Statistics" );
00668 
00669     try {
00670 
00671       ConfigurationPtr matrix_param = statistics_param->getSubsection( "ConfusionMatrix" );
00672 
00673       calc_matrix = true;
00674 
00675       std::string threshold_str = matrix_param->getAttribute( "Threshold", "" );
00676 
00677       if ( threshold_str.compare( "lpt" ) == 0 ) {
00678 
00679         threshold = -1.0; 
00680       }
00681       else {
00682 
00683         threshold = matrix_param->getAttributeAsDouble( "Threshold", CONF_MATRIX_DEFAULT_THRESHOLD );
00684       }
00685 
00686       ignore_absences_int = matrix_param->getAttributeAsInt( "IgnoreAbsences", 0 );
00687 
00688       if ( threshold < 0.0 ) {
00689 
00690         if ( _samp && _alg ) {
00691 
00692           _confusion_matrix.setLowestTrainingThreshold( getModel(), getSampler() );
00693 
00694           threshold = _confusion_matrix.getThreshold();
00695         }
00696         else {
00697 
00698           threshold = CONF_MATRIX_DEFAULT_THRESHOLD;
00699 
00700           Log::instance()->error( "Cannot determine lowest training threshold without a Model and a Sampler. The default confusion matrix threshold will be used.\n" );
00701         }
00702       }
00703     }
00704     catch( SubsectionNotFound& e ) {
00705 
00706       Log::instance()->info( "Confusion matrix not calculated\n" );
00707       UNUSED(e);
00708     }
00709 
00710     try {
00711 
00712       ConfigurationPtr roc_param = statistics_param->getSubsection( "RocCurve" );
00713 
00714       calc_roc = true;
00715 
00716       resolution = roc_param->getAttributeAsInt( "Resolution", -1 );
00717 
00718       num_background = roc_param->getAttributeAsInt( "BackgroundPoints", -1 );
00719 
00720       max_omission = roc_param->getAttributeAsDouble( "MaxOmission", 1.0 );
00721 
00722       use_absences_as_background_int = roc_param->getAttributeAsInt( "UseAbsencesAsBackground", 0 );
00723     }
00724     catch( SubsectionNotFound& e ) {
00725 
00726       Log::instance()->info( "ROC curve not calculated\n" );
00727       UNUSED(e);
00728     }
00729   }
00730   catch( SubsectionNotFound& e ) {
00731 
00732     // For backwards compatibility, calculate matrix and ROC if 
00733     // <Statistics> is not present. To avoid this, use an empty <Statistics/> element
00734     calc_matrix = true;
00735     calc_roc = true;
00736     UNUSED(e);
00737   }
00738 
00739   if ( calc_matrix || calc_roc )
00740   {
00741     if ( ! _samp ) {
00742 
00743       Log::instance()->error( "Sampler not specified for calculating statistics.\n" );
00744       return;
00745     }
00746 
00747     if ( ! _alg ) {
00748 
00749       Log::instance()->error( "Model not specified for calculating statistics.\n" );
00750       return;
00751     }
00752   }
00753 
00754   int num_presences = _samp->numPresence();
00755   int num_absences = _samp->numAbsence();
00756 
00757   // Confusion matrix can only be calculated with presence and/or absence points
00758   if ( calc_matrix && ( num_presences || num_absences ) ) {
00759 
00760     bool ignore_absences = false;
00761 
00762     if ( ignore_absences_int > 0 ) {
00763 
00764       ignore_absences = true;
00765     }
00766 
00767     _confusion_matrix.reset( threshold, ignore_absences );
00768     _confusion_matrix.calculate( getModel(), getSampler() );
00769   }
00770 
00771   // ROC curve can only be calculated with presence points
00772   // No absence points will force background points to be generated
00773   if ( calc_roc && num_presences ) {
00774 
00775     bool use_absences_as_background = false;
00776 
00777     resolution = (resolution <= 0) ? ROC_DEFAULT_RESOLUTION : resolution;
00778 
00779     if ( use_absences_as_background_int > 0 ) {
00780 
00781       use_absences_as_background = true;
00782 
00783       _roc_curve.initialize( resolution, use_absences_as_background );
00784     }
00785     else {
00786 
00787       if ( num_background > 0 ) {
00788 
00789         _roc_curve.initialize( resolution, num_background );
00790       }   
00791       else {
00792 
00793         _roc_curve.initialize( resolution );
00794       }
00795     }
00796 
00797     _roc_curve.calculate( getModel(), getSampler() );
00798 
00799     _roc_curve.getTotalArea(); // call method to force serialization
00800 
00801     if ( max_omission < 1.0 ) {
00802 
00803       _roc_curve.getPartialAreaRatio( max_omission ); // call method to force serialization
00804     }
00805   }
00806 }