openModeller  Version 1.4.0
bioclim.cpp
Go to the documentation of this file.
00001 
00028 #include "bioclim.hh"
00029 
00030 #include <openmodeller/Configuration.hh>
00031 
00032 #include <openmodeller/Exceptions.hh>
00033 
00034 #include <string.h>
00035 #include <stdio.h>
00036 #include <math.h>
00037 
00038 /****************************************************************/
00039 /********************** Algorithm's Metadata ********************/
00040 
00041 #define NUM_PARAM 1
00042 
00043 #define CUTOFF_ID "StandardDeviationCutoff"
00044 
00045 
00046 /*************************************/
00047 /*** Algorithm parameters metadata ***/
00048 
00049 static AlgParamMetadata parameters[NUM_PARAM] = {
00050 
00051   // Metadata of the first parameter.
00052   {
00053     CUTOFF_ID,                   // Id
00054     "Standard deviation cutoff", // Name.
00055     Real,                        // Type.
00056 
00057     // Overview
00058     "The envelope is determined by multiplying this parameter and the\
00059  standard deviation.",
00060 
00061     // Description.
00062     "Standard deviation cutoff for all bioclimatic envelopes.\n\
00063  Examples of (fraction of inclusion, parameter value) are:\n\
00064  (50.0%, 0.674); (68.3%, 1.000); (90.0%, 1.645); (95.0%, 1.960);\
00065  (99.7%, 3.000)",
00066 
00067     1,         // Not zero if the parameter has lower limit.
00068     0.0,       // Parameter's lower limit.
00069     0,         // Not zero if the parameter has upper limit.
00070     0.0,       // Parameter's upper limit.
00071     "0.674"    // Parameter's typical (default) value.
00072   },
00073 };
00074 
00075 
00076 /************************************/
00077 /*** Algorithm's general metadata ***/
00078 
00079 static AlgMetadata metadata = {
00080 
00081   "BIOCLIM",   // Id.
00082   "Bioclim",   // Name.
00083   "0.2",       // Version.
00084 
00085   // Overview
00086   "Uses mean and standard deviation for each environmental\
00087  variable separately to calculate bioclimatic envelopes.\
00088  Level of fitness between the environmental values on a point\
00089  and the respective envelopes classifies points as\
00090  Suitable, Marginal, or Unsuitable for presence.",
00091 
00092   // Description.
00093   "Implements the Bioclimatic Envelope Algorithm.\
00094  For each given environmental variable the algorithm finds the mean\
00095  and standard deviation (assuming normal distribution) associated\
00096  to the occurrence points. Each variable has its own envelope\
00097  represented by the interval [m - c*s, m + c*s], where 'm' is the\
00098  mean; 'c' is the cutoff input parameter; and 's' is the standard\
00099  deviation. Besides the envelope, each environmental variable has\
00100  additional upper and lower limits taken from the maximum and\
00101  minimum values related to the set of occurrence points.\nIn this\
00102  model, any point can be classified as:\n\
00103  Suitable: if all associated environmental values fall within\
00104  the calculated envelopes;\n\
00105  Marginal: if one or more associated environmental value falls\
00106  outside the calculated envelope, but still within the upper and\
00107  lower limits.\n\
00108  Unsuitable: if one or more associated enviromental value falls\
00109  outside the upper and lower limits.\n\
00110 Bioclim's categorical output is mapped to probabilities\
00111  of 1.0, 0.5 and 0.0 respectively.",
00112 
00113   "Nix, H. A.",  // Author
00114 
00115   // Bibliography.
00116   "Nix, H.A. (1986) A biogeographic analysis of Australian elapid\
00117  snakes. In: Atlas of Elapid Snakes of Australia. (Ed.) R. Longmore,\
00118  pp. 4-15. Australian Flora and Fauna Series Number 7. Australian\
00119  Government Publishing Service: Canberra.",
00120 
00121   "Mauro Muñoz",             // Code author.
00122   "mesmunoz [at] gmail.com", // Code author's contact.
00123 
00124   0,  // Does not accept categorical data.
00125   0,  // Does not need (pseudo)absence points.
00126 
00127   NUM_PARAM,   // Algorithm's parameters.
00128   parameters
00129 };
00130 
00131 
00132 
00133 /****************************************************************/
00134 /****************** Algorithm's factory function ****************/
00135 
00136 OM_ALG_DLL_EXPORT
00137 AlgorithmImpl *
00138 algorithmFactory()
00139 {
00140   return new Bioclim();
00141 }
00142 
00143 OM_ALG_DLL_EXPORT
00144 AlgMetadata const *
00145 algorithmMetadata()
00146 {
00147   return &metadata;
00148 }
00149 
00150 
00151 /****************************************************************/
00152 /**************************** Bioclim ***************************/
00153 
00154 /*******************/
00155 /*** constructor ***/
00156 
00157 Bioclim::Bioclim() :
00158   AlgorithmImpl( &metadata ),
00159   _done( false ),
00160   _minimum(),
00161   _maximum(),
00162   _mean(),
00163   _std_dev()
00164 { }
00165 
00166 
00167 /******************/
00168 /*** destructor ***/
00169 
00170 Bioclim::~Bioclim()
00171 {
00172 }
00173 
00174 
00175 /******************/
00176 /*** initialize ***/
00177 int
00178 Bioclim::initialize()
00179 {
00180   Scalar cutoff = 0.0;
00181   // Read and check the standard deviation cutoff parameter.
00182   if ( ! getParameter( CUTOFF_ID, &cutoff ) ) {
00183     Log::instance()->error( "Parameter " CUTOFF_ID " not set properly.\n" );
00184     return 0;
00185   }
00186 
00187   if ( cutoff <= 0 ) {
00188     Log::instance()->warn( "Bioclim - parameter out of range: %f\n", cutoff );
00189     return 0;
00190   }
00191   
00192   // Number of independent variables.
00193   int dim = _samp->numIndependent();
00194   Log::instance()->info( "Reading %d-dimensional occurrence points.\n", dim );
00195 
00196   // Check the number of sampled points.
00197   int npnt = _samp->numPresence();
00198   if (  npnt < 2 ) {
00199     Log::instance()->error( "Bioclim needs at least 2 points inside the mask!\n" );
00200     return 0;
00201   }
00202 
00203   Log::instance()->info( "Using %d points to find the bioclimatic envelope.\n", npnt );
00204 
00205   computeStats( _samp->getPresences() );
00206 
00207   _std_dev *= cutoff;
00208 
00209   _done = true;
00210 
00211   return 1;
00212 }
00213 
00214 
00215 /***************/
00216 /*** iterate ***/
00217 int
00218 Bioclim::iterate()
00219 {
00220   return 1;
00221 }
00222 
00223 
00224 /************/
00225 /*** done ***/
00226 int
00227 Bioclim::done() const
00228 {
00229   // This is not an iterative algorithm.
00230   return _done;
00231 }
00232 
00233 
00234 /*****************/
00235 /*** get Value ***/
00236 Scalar
00237 Bioclim::getValue( const Sample& x ) const
00238 {
00239   // Zero if some point valuble is outside its respective envelope.
00240   Scalar outside_envelope = 0;
00241 
00242   // Finds the distance from each variable mean to the respective
00243   // point value.
00244   Sample dif = x;
00245   dif -= _mean;
00246 
00247   for( unsigned int i=0; i<x.size(); i++) {
00248 
00249     if ( x[i] < _minimum[i] || x[i] > _maximum[i] ) {
00250       return 0.0;
00251     }
00252 
00253     if ( ! outside_envelope ) {
00254 
00255       Scalar cutoff = _std_dev[i];
00256 
00257       // If some x[i] is out of its bioclimatic envelope, predicts
00258       // no occurrence.
00259       if ( dif[i] > cutoff || dif[i] < -cutoff ) {
00260         outside_envelope = 1;
00261       }
00262     }
00263 
00264   }
00265 
00266   // If all point values are within the envelope, returns probability
00267   // 1.0. Else, if some point is outside the envelope but inside
00268   // the upper and lower ranges, returns 0.5 of probability.
00269   return outside_envelope ? 0.5 : 1.0;
00270 }
00271 
00272 
00273 /***********************/
00274 /*** get Convergence ***/
00275 int
00276 Bioclim::getConvergence( Scalar * const val ) const
00277 {
00278   *val = 1.0;
00279   return 1;
00280 }
00281 
00282 
00283 /*******************/
00284 /*** get Minimum ***/
00285 void
00286 Bioclim::computeStats( const OccurrencesPtr& occs )
00287 {
00288 
00289   // Compute min, max, and mean
00290   {
00291     OccurrencesImpl::const_iterator oc = occs->begin();
00292     OccurrencesImpl::const_iterator end = occs->end();
00293 
00294     // Intialize _minimum, _maximum, and _mean
00295     // to the values of the first point, and increment
00296     // to get it out of the loop.
00297     Sample const & sample = (*oc)->environment();
00298     _minimum = sample;
00299     _maximum = sample;
00300     _mean = sample;
00301     
00302     ++oc;
00303     
00304     // For each Occurrence, update the
00305     // statistics for _minimum, _maximum, and _mean
00306     
00307     while ( oc != end ) {
00308       
00309       Sample const& sample = (*oc)->environment();
00310       
00311       _mean += sample;
00312       _minimum &= sample;
00313       _maximum |= sample;
00314 
00315       ++oc;
00316     }
00317 
00318     // Divide for the mean.
00319     _mean /= Scalar( occs->numOccurrences() );
00320 
00321   }
00322 
00323   // Now compute the std deviation by first computing the variance.
00324   {
00325 
00326     _std_dev.resize( _mean.size() );
00327     OccurrencesImpl::const_iterator oc = occs->begin();
00328     OccurrencesImpl::const_iterator end = occs->end();
00329 
00330     // Now we compute the variance.
00331     while ( oc != end ) {
00332       Sample tmp( (*oc)->environment() );
00333       tmp -= _mean;
00334       tmp *= tmp;
00335       _std_dev += tmp;
00336       ++oc;
00337     }
00338 
00339     // In variance, we divide by (npnt - 1), not npnt!
00340     Scalar npts = Scalar( occs->numOccurrences() - 1 );
00341 
00342     // Now divide and root to get deviation.
00343     _std_dev /= npts;
00344     _std_dev.sqrt();
00345   }
00346 
00347 }
00348 
00349 /****************************************************************/
00350 /****************** configuration *******************************/
00351 void
00352 Bioclim::_getConfiguration( ConfigurationPtr& config ) const
00353 {
00354   if ( !_done )
00355     return;
00356 
00357   ConfigurationPtr model_config( new ConfigurationImpl("Bioclim") );
00358   config->addSubsection( model_config );
00359 
00360   model_config->addNameValue( "Mean", _mean );
00361   model_config->addNameValue( "StdDev", _std_dev );
00362   model_config->addNameValue( "Minimum", _minimum );
00363   model_config->addNameValue( "Maximum", _maximum );
00364 
00365 }
00366 
00367 void
00368 Bioclim::_setConfiguration( const ConstConfigurationPtr& config )
00369 {
00370   ConstConfigurationPtr model_config = config->getSubsection( "Bioclim" );
00371 
00372   if (!model_config)
00373     return;
00374 
00375   _done = true;
00376 
00377   _mean = model_config->getAttributeAsSample( "Mean" );
00378   _std_dev = model_config->getAttributeAsSample( "StdDev" );
00379   _minimum = model_config->getAttributeAsSample( "Minimum" );
00380   _maximum = model_config->getAttributeAsSample( "Maximum" );
00381 
00382   return;
00383 }
00384 
00385 /********************/
00386 /*** log Envelope ***/
00387 void
00388 Bioclim::logEnvelope()
00389 {
00390   Log::instance()->info( "Envelope with %d dimensions (variables).\n\n", _mean.size() );
00391 
00392   for ( unsigned int i = 0; i < _mean.size(); i++ )
00393     {
00394       Log::instance()->info( "Variable %02d:", i );
00395       Log::instance()->info( " Mean     : %f\n", _mean[i] );
00396       Log::instance()->info( " Deviation: %f\n", _std_dev[i] );
00397       Log::instance()->info( " Minimum  : %f\n", _minimum[i] );
00398       Log::instance()->info( " Maximum  : %f\n", _maximum[i] );
00399       Log::instance()->info( "\n" );
00400     }
00401 }