openModeller  Version 1.4.0
Sampler.cpp
Go to the documentation of this file.
00001 
00028 #include <openmodeller/Environment.hh>
00029 #include <openmodeller/Occurrence.hh>
00030 #include <openmodeller/Configuration.hh>
00031 
00032 #include <openmodeller/Sampler.hh>
00033 #include <openmodeller/Occurrences.hh>
00034 #include <openmodeller/Log.hh>
00035 #include <openmodeller/Random.hh>
00036 #include <openmodeller/env_io/Map.hh>
00037 #include <openmodeller/Model.hh>
00038 
00039 #include <stdio.h>
00040 #include <string.h>
00041 #include <sstream>
00042 
00043 #include <openmodeller/Exceptions.hh>
00044 
00045 using std::string;
00046 
00047 #undef DEBUG_MEMORY
00048 
00049 /****************************************************************/
00050 /*************************** Sampler ****************************/
00051 
00052 SamplerPtr createSampler( const EnvironmentPtr& env,
00053         const OccurrencesPtr& presence,
00054         const OccurrencesPtr& absence )
00055 {
00056   return SamplerPtr( new SamplerImpl( env, presence, absence ) );
00057 }
00058 
00059 SamplerPtr createSampler( const ConstConfigurationPtr& config )
00060 {
00061   SamplerPtr samp( new SamplerImpl() );
00062   samp->setConfiguration( config );
00063   return samp;
00064 }
00065 
00066 /*******************/
00067 /*** constructor ***/
00068 
00069 SamplerImpl::SamplerImpl() :
00070   ReferenceCountedObject(),
00071   _presence(),
00072   _absence(),
00073   _env(),
00074   _normalized( false )
00075 {
00076 #ifdef DEBUG_MEMORY
00077   Log::instance()->debug("SamplerImpl::SamplerImpl() at %x\n",this);
00078 #endif
00079 }
00080 
00081 SamplerImpl::SamplerImpl( const EnvironmentPtr& env, 
00082         const OccurrencesPtr& presence,
00083         const OccurrencesPtr& absence,
00084         bool  isNormalized ) :
00085   ReferenceCountedObject(),
00086   _presence( presence ),
00087   _absence( absence ),
00088   _env( env ),
00089   _normalized( isNormalized )
00090 {
00091 #ifdef DEBUG_MEMORY
00092   Log::instance()->debug("SamplerImpl::SamplerImpl( args ) at %x\n",this);
00093 #endif
00094   setEnvironmentInOccurrences();
00095 }
00096 
00097 /******************/
00098 /*** destructor ***/
00099 
00100 SamplerImpl::~SamplerImpl()
00101 {
00102 #ifdef DEBUG_MEMORY
00103   Log::instance()->debug("SamplerImpl::~SamplerImpl() at %x\n",this);
00104 #endif
00105 }
00106 
00107 void
00108 SamplerImpl::setEnvironmentInOccurrences()
00109 {
00110   // Copy data from environment into the presence and absence points.
00111 
00112   if ( _presence && ! _presence->hasEnvironment() ) {
00113 
00114     _presence->setEnvironment( _env, "Presence" );
00115   }
00116 
00117   if ( _absence && ! _absence->hasEnvironment() ) {
00118 
00119     _absence->setEnvironment( _env, "Absence" );
00120   }
00121 }
00122 
00123 /*********************/
00124 /*** configuration ***/
00125 
00126 ConfigurationPtr
00127 SamplerImpl::getConfiguration( ) const
00128 {
00129 
00130   ConfigurationPtr config( new ConfigurationImpl( "Sampler" ) );
00131 
00132   if ( _env ) {
00133 
00134     config->addSubsection( _env->getConfiguration() );
00135   }
00136 
00137   // Even if there are zero presences, include the element if there no absences (the XML Schema mandates at least one of <Presence> or <Absence>)
00138   if ( _presence && ( _presence->numOccurrences() > 0 || ! _absence || _absence->numOccurrences() == 0) ) {
00139 
00140     ConfigurationPtr cfg( _presence->getConfiguration() );
00141     cfg->setName( "Presence" );
00142     config->addSubsection( cfg );
00143   }
00144 
00145   if ( _absence && _absence->numOccurrences() > 0 ) {
00146 
00147     ConfigurationPtr cfg( _absence->getConfiguration() );
00148     cfg->setName( "Absence" );
00149     config->addSubsection( cfg );
00150   }
00151 
00152   return config;
00153 }
00154 
00155 void
00156 SamplerImpl::setConfiguration( const ConstConfigurationPtr& config )
00157 {
00158   EnvironmentPtr env;
00159   if ( ConstConfigurationPtr env_config = config->getSubsection( "Environment", false ) ) {
00160 
00161     env = createEnvironment();
00162     env->setConfiguration( env_config );
00163   }
00164 
00165   // As of now, the configuration for occurrences do not set/get
00166   // abundance values. It's hard-coded here, 1 for presences, 0 for absences.
00167   Log::instance()->debug( "Getting presence\n" );
00168   OccurrencesPtr presence( new OccurrencesImpl(1.0) );
00169   presence->setConfiguration( config->getSubsection( "Presence" ) );
00170 
00171   Log::instance()->debug( "Getting absence\n" );
00172   OccurrencesPtr absence;
00173 
00174   if ( ConstConfigurationPtr absence_config = config->getSubsection( "Absence", false ) ) {
00175 
00176     absence = new OccurrencesImpl(0.0);
00177     absence->setConfiguration( absence_config );
00178   }
00179 
00180   if ( ! presence->numOccurrences() ) { 
00181 
00182     std::string msg = "No presence points available.\n";
00183 
00184     Log::instance()->error( msg.c_str() );
00185 
00186     throw SamplerException( msg );
00187   }
00188 
00189   Log::instance()->debug( "Loaded %u presence(s)\n", presence->numOccurrences() );
00190 
00191   int num_absences = 0;
00192 
00193   if ( absence ) {
00194 
00195     num_absences = absence->numOccurrences();
00196   }
00197 
00198   Log::instance()->debug( "Loaded %u absence(s)\n", num_absences );
00199 
00200   _env = env;
00201   _presence = presence;
00202   _absence  = absence;
00203 
00204   setEnvironmentInOccurrences();
00205 }
00206 
00207 /******************/
00208 /*** get MinMax ***/
00209 void SamplerImpl::getMinMax( Sample * min, Sample * max ) const
00210 {
00211   // normalize samples in occs objects
00212   // first get all occurrence objects in the same container
00213   OccurrencesPtr allOccs;
00214 
00215   if ( _presence ) {
00216 
00217     allOccs = new OccurrencesImpl( _presence->label(), _presence->coordSystem() );
00218   }
00219   else {
00220 
00221     allOccs = new OccurrencesImpl( _absence->label(), _absence->coordSystem() );
00222   }
00223 
00224   allOccs->appendFrom( _presence );
00225   allOccs->appendFrom( _absence );
00226 
00227   // now compute normalization parameters
00228   allOccs->getMinMax( min, max );
00229 }
00230 
00231 /*****************/
00232 /*** normalize ***/
00233 void SamplerImpl::normalize( Normalizer * normalizerPtr )
00234 {
00235   // Avoid renormalizing the sampler
00236   if ( _normalized ) {
00237 
00238     return;
00239   }
00240 
00241   if ( ! _env ) {
00242 
00243     std::string msg = "Cannot normalize sampler without an Environment.\n";
00244 
00245     Log::instance()->error( msg.c_str() );
00246 
00247     throw SamplerException( msg );
00248   }
00249 
00250   // set env in all occurrences before normalizing env so that
00251   // occurrences get the unnormalized values
00252   setEnvironmentInOccurrences();
00253   _env->normalize( normalizerPtr );
00254 
00255   // need to normalize presences and absences even if _env is present
00256   // because environment in occurrences was set with unnormalized values
00257   // if _env doesn't exist, then normalize occurrences anyway
00258   if ( _presence && _presence->numOccurrences() ) {
00259 
00260     _presence->normalize( normalizerPtr, _env->numCategoricalLayers() );
00261   }
00262 
00263   if ( _absence && _absence->numOccurrences() ) {
00264 
00265     _absence->normalize( normalizerPtr, _env->numCategoricalLayers() );
00266   }
00267 
00268   _normalized = true;
00269 }
00270 
00271 /*****************/
00272 /*** normalize ***/
00273 void SamplerImpl::resetNormalization()
00274 {
00275   if ( _normalized ) {
00276 
00277     _env->resetNormalization();
00278 
00279     if ( _presence && _presence->numOccurrences() ) {
00280 
00281       _presence->resetNormalization();
00282     }
00283 
00284     if ( _absence && _absence->numOccurrences() ) {
00285 
00286       _absence->resetNormalization();
00287     }
00288 
00289     _normalized = false;
00290   }
00291 }
00292 
00293 /***********************/
00294 /*** num Independent ***/
00295 int
00296 SamplerImpl::numIndependent() const
00297 {
00298   if ( _env ) {
00299     // get number of dimensions from environment object if it exists
00300     return _env->numLayers();
00301   }
00302   else if ( _presence && _presence->hasEnvironment() ) {
00303     // otherwise try to get it from presences
00304     return _presence->dimension();
00305   }
00306   else if ( _absence && _absence->hasEnvironment() ) {
00307     // otherwise try to get it from absences
00308     return _absence->dimension();
00309   }
00310 
00311   // neither object has dimensions defined
00312   return 0;
00313 }
00314 
00315 
00316 /*********************/
00317 /*** num Dependent ***/
00318 int
00319 SamplerImpl::numDependent() const
00320 {
00321   return _presence ? _presence->numAttributes() : _absence->numAttributes();
00322 }
00323 
00324 
00325 /********************/
00326 /*** num Presence ***/
00327 int
00328 SamplerImpl::numPresence() const
00329 {
00330   return _presence ? _presence->numOccurrences() : 0;
00331 }
00332 
00333 
00334 /*******************/
00335 /*** num Absence ***/
00336 int
00337 SamplerImpl::numAbsence() const
00338 {
00339   return _absence ? _absence->numOccurrences() : 0;
00340 }
00341 
00342 
00343 /**********************/
00344 /*** get One Sample ***/
00345 ConstOccurrencePtr
00346 SamplerImpl::getOneSample( ) const
00347 {
00348   Random rnd;
00349 
00350   if ( ! _presence ) { 
00351 
00352     std::string msg = "No presence points available for sampling.\n";
00353 
00354     Log::instance()->error( msg.c_str() );
00355 
00356     throw SamplerException( msg );
00357   }
00358 
00359   if ( ! _presence->numOccurrences() ) { 
00360 
00361     std::string msg = "Cannot use zero presence points for sampling.\n";
00362 
00363     Log::instance()->error( msg.c_str() );
00364 
00365     throw SamplerException( msg );
00366   }
00367 
00368   // Probability of 0.5 of get a presence point.
00369   if ( rnd() < 0.5 ) {
00370 
00371     return getPresence();
00372   }
00373 
00374   // Probability of 0.5 of get an absence point.
00375   // (if there are real absence points...)
00376   if ( _absence && _absence->numOccurrences() ) {
00377 
00378     return getAbsence();
00379   }
00380 
00381   return getPseudoAbsence();
00382 }
00383 
00384 /******************************/
00385 /*** generate Random Sample ***/
00386 OccurrencePtr
00387 SamplerImpl::generateRandomSample(Scalar abundance) const 
00388 {
00389   if ( ! _env ) {
00390 
00391     std::string msg = "Cannot generate random samples without an Environment object.\n";
00392 
00393     Log::instance()->error( msg.c_str() );
00394 
00395     throw SamplerException( msg );
00396   }
00397 
00398   // Generate a random sample
00399   static const Sample mysample( numDependent() );
00400   Coord x,y;
00401 
00402   Sample env( _env->getRandom( &x, &y ) );
00403 
00404   OccurrencePtr oc = new OccurrenceImpl( "?", x, y, 0.0, abundance, mysample, env );
00405 
00406   return oc;
00407 }
00408 
00409 /**************************/
00410 /*** get Pseudo Absence ***/
00411 OccurrencePtr
00412 SamplerImpl::getPseudoAbsence() const 
00413 {
00414   return generateRandomSample(0.0);
00415 }
00416 
00417 /**************************/
00418 /*** get Pseudo Presence ***/
00419 OccurrencePtr
00420 SamplerImpl::getPseudoPresence() const 
00421 {
00422   return generateRandomSample(1.0);
00423 }
00424 
00425 /**************************/
00426 /*** get Pseudo Absence ***/
00427 OccurrencePtr 
00428 SamplerImpl::getPseudoAbsence( const Model& model, const Scalar threshold ) const
00429 {
00430    double prob = 0.0;
00431 
00432    OccurrencePtr occ;
00433 
00434    int max_loop = 5000;
00435 
00436    int loop = 0;
00437 
00438    do {
00439 
00440      occ = getPseudoAbsence();
00441 
00442      if ( model ) {
00443 
00444        prob = model->getValue( occ->environment() );
00445      }
00446 
00447      loop++;
00448 
00449    } while ( ( prob >= threshold ) && ( loop < max_loop ) );
00450 
00451    if ( loop == max_loop ) {
00452 
00453      std::string msg = "Exceeded maximum number of attempts to generate point outside model.\n";
00454 
00455      Log::instance()->error( msg.c_str() );
00456 
00457      throw SamplerException( msg );
00458    }
00459 
00460    return occ;
00461 }
00462 
00463 /**************************/
00464 /*** get Pseudo Presence ***/
00465 OccurrencePtr 
00466 SamplerImpl::getPseudoPresence( const Model& model, const Scalar threshold ) const
00467 {
00468    double prob = 0.0;
00469 
00470    OccurrencePtr occ;
00471 
00472    int max_loop = 5000;
00473 
00474    int loop = 0;
00475 
00476    do {
00477 
00478      occ = getPseudoPresence();
00479 
00480      if ( model ) {
00481 
00482        prob = model->getValue( occ->environment() );
00483      }
00484 
00485      loop++;
00486 
00487    } while ( ( prob < threshold ) && ( loop < max_loop ) );
00488 
00489    if ( loop == max_loop ) {
00490 
00491      std::string msg = "Exceeded maximum number of attempts to generate point inside model.\n";
00492 
00493      Log::instance()->error( msg.c_str() );
00494 
00495      throw SamplerException( msg );
00496    }
00497 
00498    return occ;
00499 }
00500 
00501 /**************************/
00502 /*** get Pseudo Absence ***/
00503 OccurrencePtr 
00504 SamplerImpl::getPseudoAbsenceOutsideInterval( const Sample * minimum, const Sample * maximum ) const
00505 {
00506    bool not_found = true;
00507 
00508    OccurrencePtr occ;
00509    Sample x;
00510 
00511    int max_loop = 5000;
00512 
00513    int loop = 0;
00514 
00515    do {
00516 
00517      occ = getPseudoAbsence();
00518      x = occ->environment();
00519 
00520      for ( unsigned int i = 0; i < x.size(); i++ ) {
00521 
00522        if ( x.size() == 0 ) 
00523          break;
00524 
00525        if ( x[i] < (*minimum)[i] || x[i] > (*maximum)[i] ) {
00526 
00527          not_found = false;
00528          break;
00529        }
00530      }
00531 
00532      loop++;
00533 
00534    } while ( ( not_found ) && ( loop < max_loop ) );
00535 
00536    if ( loop == max_loop ) {
00537 
00538      std::string msg = "Exceeded maximum number of attempts to generate point outside interval.\n";
00539 
00540      Log::instance()->error( msg.c_str() );
00541 
00542      throw SamplerException( msg );
00543    }
00544 
00545    return occ;
00546 }
00547 
00548 /**************************/
00549 /*** get Pseudo Presence ***/
00550 OccurrencePtr 
00551 SamplerImpl::getPseudoPresenceInsideInterval( const Sample * minimum, const Sample * maximum ) const
00552 {
00553    bool not_found = true;
00554 
00555    OccurrencePtr occ;
00556    Sample x;
00557 
00558    int max_loop = 5000;
00559 
00560    int loop = 0;
00561 
00562    do {
00563 
00564      occ = getPseudoPresence();
00565      x = occ->environment();
00566 
00567      for ( unsigned int i = 0; i < x.size(); i++ ) {
00568 
00569        if ( x.size() == 0 ) 
00570          break;
00571 
00572        if ( x[i] >= (*minimum)[i] && x[i] <= (*maximum)[i] ) {
00573 
00574          not_found = false;
00575          break;
00576        }
00577      }
00578 
00579      loop++;
00580 
00581    } while ( ( not_found ) && ( loop < max_loop ) );
00582 
00583    if ( loop == max_loop ) {
00584 
00585      std::string msg = "Exceeded maximum number of attempts to generate point inside interval.\n";
00586 
00587      Log::instance()->error( msg.c_str() );
00588 
00589      throw SamplerException( msg );
00590    }
00591 
00592    return occ;
00593 }
00594 
00595 /***************************/
00596 /*** get Pseudo Absences ***/
00597 OccurrencesPtr 
00598 SamplerImpl::getPseudoAbsences( const int& numPoints, const Model& model, const Scalar threshold, const bool geoUnique, const bool envUnique, const int idSequenceStart) const 
00599 {
00600    int i = 0;
00601 
00602    OccurrencesPtr occurrences( new OccurrencesImpl(0.0) );
00603 
00604    do
00605    {
00606      OccurrencePtr point;
00607 
00608      if ( model ) {
00609 
00610        point = getPseudoAbsence( model, threshold );
00611      }
00612      else {
00613 
00614        point = getPseudoAbsence();
00615      }
00616 
00617      if ( geoUnique ) {
00618 
00619        if ( envUnique ) {
00620 
00621          if ( isEnvironmentallyUnique( occurrences, point ) && 
00622               isEnvironmentallyUnique( _presence, point ) && 
00623               isEnvironmentallyUnique( _absence, point ) ) {
00624 
00625            std::ostringstream oss;
00626            oss << idSequenceStart+i;
00627            point->setId( oss.str() );
00628            occurrences->insert( point );
00629            i++;
00630          }
00631        }
00632        else {
00633 
00634          if ( isSpatiallyUnique( occurrences, point ) && 
00635               isSpatiallyUnique( _presence, point ) && 
00636               isSpatiallyUnique( _absence, point ) ) {
00637 
00638            std::ostringstream oss;
00639            oss << idSequenceStart+i;
00640            point->setId( oss.str() );
00641            occurrences->insert( point );
00642            i++;
00643          }
00644        }
00645      }
00646      else {
00647 
00648        if ( envUnique ) {
00649 
00650          if ( isEnvironmentallyUnique( occurrences, point ) && 
00651               isEnvironmentallyUnique( _presence, point ) && 
00652               isEnvironmentallyUnique( _absence, point ) ) {
00653 
00654            std::ostringstream oss;
00655            oss << idSequenceStart+i;
00656            point->setId( oss.str() );
00657            occurrences->insert( point );
00658            i++;
00659          }
00660        }
00661        else {
00662 
00663          std::ostringstream oss;
00664          oss << idSequenceStart+i;
00665          point->setId( oss.str() );
00666          occurrences->insert( point );
00667          i++;
00668        }
00669      }
00670 
00671    } while ( i < numPoints );
00672 
00673    return occurrences;
00674 }
00675 
00676 /***************************/
00677 /*** get Pseudo Presences ***/
00678 OccurrencesPtr 
00679 SamplerImpl::getPseudoPresences( const int& numPoints, const Model& model, const Scalar threshold, const bool geoUnique, const bool envUnique, const int idSequenceStart) const 
00680 {
00681    int i = 0;
00682 
00683    OccurrencesPtr occurrences( new OccurrencesImpl(1.0) );
00684 
00685    do
00686    {
00687      OccurrencePtr point;
00688 
00689      if ( model ) {
00690 
00691        point = getPseudoPresence( model, threshold );
00692      }
00693      else {
00694 
00695        point = getPseudoPresence();
00696      }
00697 
00698      if ( geoUnique ) {
00699 
00700        if ( envUnique ) {
00701 
00702          if ( isEnvironmentallyUnique( occurrences, point ) && 
00703               isEnvironmentallyUnique( _presence, point ) && 
00704               isEnvironmentallyUnique( _absence, point ) ) {
00705 
00706            std::ostringstream oss;
00707            oss << idSequenceStart+i;
00708            point->setId( oss.str() );
00709            occurrences->insert( point );
00710            i++;
00711          }
00712        }
00713        else {
00714 
00715          if ( isSpatiallyUnique( occurrences, point ) && 
00716               isSpatiallyUnique( _presence, point ) && 
00717               isSpatiallyUnique( _absence, point ) ) {
00718 
00719            std::ostringstream oss;
00720            oss << idSequenceStart+i;
00721            point->setId( oss.str() );
00722            occurrences->insert( point );
00723            i++;
00724          }
00725        }
00726      }
00727      else {
00728 
00729        if ( envUnique ) {
00730 
00731          if ( isEnvironmentallyUnique( occurrences, point ) && 
00732               isEnvironmentallyUnique( _presence, point ) && 
00733               isEnvironmentallyUnique( _absence, point ) ) {
00734 
00735            std::ostringstream oss;
00736            oss << idSequenceStart+i;
00737            point->setId( oss.str() );
00738            occurrences->insert( point );
00739            i++;
00740          }
00741        }
00742        else {
00743 
00744          std::ostringstream oss;
00745          oss << idSequenceStart+i;
00746          point->setId( oss.str() );
00747          occurrences->insert( point );
00748          i++;
00749        }
00750      }
00751 
00752    } while ( i < numPoints );
00753 
00754    return occurrences;
00755 }
00756 
00757 /***************************/
00758 /*** get Pseudo Absences ***/
00759 OccurrencesPtr 
00760 SamplerImpl::getPseudoAbsences( const int& numPoints, const Sample * minimum, const Sample * maximum, const bool geoUnique, const bool envUnique, const int idSequenceStart) const 
00761 {
00762    int i = 0;
00763 
00764    OccurrencesPtr occurrences( new OccurrencesImpl(0.0) );
00765 
00766    do
00767    {
00768      OccurrencePtr point;
00769 
00770      point = getPseudoAbsenceOutsideInterval( minimum, maximum );
00771 
00772      if ( geoUnique ) {
00773 
00774        if ( envUnique ) {
00775 
00776          if ( isEnvironmentallyUnique( occurrences, point ) && 
00777               isEnvironmentallyUnique( _presence, point ) && 
00778               isEnvironmentallyUnique( _absence, point ) ) {
00779 
00780            std::ostringstream oss;
00781            oss << idSequenceStart+i;
00782            point->setId( oss.str() );
00783            occurrences->insert( point );
00784            i++;
00785          }
00786        }
00787        else {
00788 
00789          if ( isSpatiallyUnique( occurrences, point ) && 
00790               isSpatiallyUnique( _presence, point ) && 
00791               isSpatiallyUnique( _absence, point ) ) {
00792 
00793            std::ostringstream oss;
00794            oss << idSequenceStart+i;
00795            point->setId( oss.str() );
00796            occurrences->insert( point );
00797            i++;
00798          }
00799        }
00800      }
00801      else {
00802 
00803        if ( envUnique ) {
00804 
00805          if ( isEnvironmentallyUnique( occurrences, point ) && 
00806               isEnvironmentallyUnique( _presence, point ) && 
00807               isEnvironmentallyUnique( _absence, point ) ) {
00808 
00809            std::ostringstream oss;
00810            oss << idSequenceStart+i;
00811            point->setId( oss.str() );
00812            occurrences->insert( point );
00813            i++;
00814          }
00815        }
00816        else {
00817 
00818          std::ostringstream oss;
00819          oss << idSequenceStart+i;
00820          point->setId( oss.str() );
00821          occurrences->insert( point );
00822          i++;
00823        }
00824      }
00825 
00826    } while ( i < numPoints );
00827 
00828    return occurrences;
00829 }
00830 
00831 /***************************/
00832 /*** get Pseudo Presences ***/
00833 OccurrencesPtr 
00834 SamplerImpl::getPseudoPresences( const int& numPoints, const Sample * minimum, const Sample * maximum, const bool geoUnique, const bool envUnique, const int idSequenceStart) const 
00835 {
00836    int i = 0;
00837 
00838    OccurrencesPtr occurrences( new OccurrencesImpl(1.0) );
00839 
00840    do
00841    {
00842      OccurrencePtr point;
00843 
00844      point = getPseudoPresenceInsideInterval( minimum, maximum );
00845 
00846      if ( geoUnique ) {
00847 
00848        if ( envUnique ) {
00849 
00850          if ( isEnvironmentallyUnique( occurrences, point ) && 
00851               isEnvironmentallyUnique( _presence, point ) && 
00852               isEnvironmentallyUnique( _absence, point ) ) {
00853 
00854            std::ostringstream oss;
00855            oss << idSequenceStart+i;
00856            point->setId( oss.str() );
00857            occurrences->insert( point );
00858            i++;
00859          }
00860        }
00861        else {
00862 
00863          if ( isSpatiallyUnique( occurrences, point ) && 
00864               isSpatiallyUnique( _presence, point ) && 
00865               isSpatiallyUnique( _absence, point ) ) {
00866 
00867            std::ostringstream oss;
00868            oss << idSequenceStart+i;
00869            point->setId( oss.str() );
00870            occurrences->insert( point );
00871            i++;
00872          }
00873        }
00874      }
00875      else {
00876 
00877        if ( envUnique ) {
00878 
00879          if ( isEnvironmentallyUnique( occurrences, point ) && 
00880               isEnvironmentallyUnique( _presence, point ) && 
00881               isEnvironmentallyUnique( _absence, point ) ) {
00882 
00883            std::ostringstream oss;
00884            oss << idSequenceStart+i;
00885            point->setId( oss.str() );
00886            occurrences->insert( point );
00887            i++;
00888          }
00889        }
00890        else {
00891 
00892          std::ostringstream oss;
00893          oss << idSequenceStart+i;
00894          point->setId( oss.str() );
00895          occurrences->insert( point );
00896          i++;
00897        }
00898      }
00899 
00900    } while ( i < numPoints );
00901 
00902    return occurrences;
00903 }
00904 
00905 /**********************/
00906 /*** is Categorical ***/
00907 int
00908 SamplerImpl::isCategorical( int i )
00909 {
00910   if ( _env ) {
00911 
00912     return _env->isCategorical( i );
00913   }
00914   else {
00915     // if there is no environment obj, assumes all variables are continuous
00916     // right now there is no mechanism to define whether a variable is 
00917     // continuous or categorical when occurrences already come with their
00918     // samples populated
00919     return false;
00920   }
00921 }
00922 
00923 
00924 /******************************/
00925 /*** environmentally unique ***/
00926 void 
00927 SamplerImpl::environmentallyUnique( )
00928 {
00929   Log::instance()->info( "Applying filter: enviromentally unique\n" );
00930 
00931   // Presences
00932   this->environmentallyUnique( _presence, "Presence" );
00933 
00934   // Absences
00935   this->environmentallyUnique( _absence, "Absence" );
00936 }
00937 
00938 
00939 /******************************/
00940 /*** environmentally unique ***/
00941 void 
00942 SamplerImpl::environmentallyUnique( OccurrencesPtr& occurrencesPtr, const char *type )
00943 {
00944   if ( ! ( occurrencesPtr && occurrencesPtr->numOccurrences() ) ) {
00945 
00946     return;
00947   }
00948 
00949   OccurrencesImpl::iterator it   = occurrencesPtr->begin();
00950   OccurrencesImpl::iterator last = occurrencesPtr->end();
00951 
00952   while ( it != last ) {
00953 
00954     Sample sample = _env->getUnnormalized( (*it)->x(), (*it)->y() );
00955 
00956     OccurrencesImpl::iterator next = it + 1;
00957 
00958     while ( next != last ) {
00959 
00960       Sample nextSample = _env->getUnnormalized( (*next)->x(), (*next)->y() );
00961 
00962       if ( sample.equals( nextSample ) ) {
00963 
00964         Log::instance()->info( "%s Point \"%s\" at (%f,%f) has no unique environment. It will be discarded.\n", type, ((*next)->id()).c_str(), (*next)->x(), (*next)->y() );
00965 
00966         // Remove duplicates
00967         next = occurrencesPtr->erase( next );
00968         last = occurrencesPtr->end();
00969 
00970         // Increase abundance in original occurence
00971         (*it)->setAbundance( (*it)->abundance() + 1 );
00972 
00973         // No need to increment "next" because "erase" actually swaps 
00974         // the last element with the one that was just erased!
00975       }
00976       else {
00977 
00978         ++next;
00979       }
00980     }
00981 
00982     ++it;
00983   }
00984 }
00985 
00986 
00987 /************************/
00988 /*** spatially unique ***/
00989 void 
00990 SamplerImpl::spatiallyUnique( )
00991 {
00992   Log::instance()->info( "Applying filter: spatially unique\n" );
00993 
00994   // Presences
00995   this->spatiallyUnique( _presence, "Presence" );
00996 
00997   // Absences
00998   this->spatiallyUnique( _absence, "Absence" );
00999 }
01000 
01001 
01002 /************************/
01003 /*** spatially unique ***/
01004 void 
01005 SamplerImpl::spatiallyUnique( OccurrencesPtr& occurrencesPtr, const char *type )
01006 {
01007   if ( ! ( occurrencesPtr && occurrencesPtr->numOccurrences() ) ) {
01008 
01009     return;
01010   }
01011 
01012   Map *mask = _env->getMask();
01013 
01014   // If mask is undefined, use first layer as a mask
01015   if ( ! mask ) {
01016 
01017     mask = _env->getLayer( 0 );
01018   }
01019 
01020   OccurrencesImpl::iterator it   = occurrencesPtr->begin();
01021   OccurrencesImpl::iterator last = occurrencesPtr->end();
01022 
01023   while ( it != last ) {
01024 
01025     int row, col;
01026 
01027     mask->getRowColumn( (*it)->x(), (*it)->y(), &row, &col );
01028 
01029     OccurrencesImpl::iterator next = it + 1;
01030 
01031     while ( next != last ) {
01032 
01033       int nextRow, nextCol;
01034 
01035       mask->getRowColumn( (*next)->x(), (*next)->y(), &nextRow, &nextCol );
01036 
01037       if ( row == nextRow && col == nextCol ) {
01038 
01039         Log::instance()->info( "%s Point \"%s\" at (%f,%f) has no unique geography. It will be discarded.\n", type, ((*next)->id()).c_str(), (*next)->x(), (*next)->y() );
01040 
01041         // Remove duplicates
01042         next = occurrencesPtr->erase( next );
01043         last = occurrencesPtr->end();
01044 
01045         // Increase abundance in original occurence
01046         (*it)->setAbundance( (*it)->abundance() + 1 );
01047 
01048         // No need to increment "next" because "erase" actually swaps 
01049         // the last element with the one that was just erased!
01050       }
01051       else {
01052 
01053         ++next;
01054       }
01055     }
01056 
01057     ++it;
01058   }
01059 }
01060 
01061 
01062 /*********************************/
01063 /*** is Environmentally Unique ***/
01064 bool 
01065 SamplerImpl::isEnvironmentallyUnique( const OccurrencesPtr& occurrences, const OccurrencePtr& point )const
01066 {
01067   if ( ! ( occurrences && occurrences->numOccurrences() ) ) {
01068 
01069     return true;
01070   }
01071 
01072   OccurrencesImpl::iterator it   = occurrences->begin();
01073   OccurrencesImpl::iterator last = occurrences->end();
01074 
01075   Sample sample = _env->getUnnormalized( point->x(), point->y() );
01076 
01077   while ( it != last ) {
01078 
01079     Sample nextSample = _env->getUnnormalized( (*it)->x(), (*it)->y() );
01080 
01081     if ( sample.equals( nextSample ) ) {
01082 
01083       return false;
01084     }
01085     else {
01086 
01087       ++it;
01088     }
01089   }
01090 
01091   return true;
01092 }
01093 
01094 /***************************/
01095 /*** is Spatially Unique ***/
01096 bool 
01097 SamplerImpl::isSpatiallyUnique( const OccurrencesPtr& occurrences, const OccurrencePtr& point ) const
01098 {
01099   if ( ! ( occurrences && occurrences->numOccurrences() ) ) {
01100 
01101     return true;
01102   }
01103 
01104   Map *mask = _env->getMask();
01105 
01106   // If mask is undefined, use first layer as a mask
01107   if ( ! mask ) {
01108 
01109     mask = _env->getLayer( 0 );
01110   }
01111 
01112   OccurrencesImpl::iterator it   = occurrences->begin();
01113   OccurrencesImpl::iterator last = occurrences->end();
01114 
01115   int row, col;
01116   mask->getRowColumn( point->x(), point->y(), &row, &col );
01117 
01118   while ( it != last ) {
01119 
01120     int next_row, next_col;
01121 
01122     mask->getRowColumn( (*it)->x(), (*it)->y(), &next_row, &next_col );
01123 
01124     if ( row == next_row && col == next_col ) {
01125 
01126       return false;
01127     }
01128     else {
01129 
01130       ++it;
01131     }
01132   }
01133 
01134   return true;
01135 }
01136 
01137 
01138 /*****************************/
01139 /*** get Random Occurrence ***/
01140 ConstOccurrencePtr
01141 SamplerImpl::getRandomOccurrence( const OccurrencesPtr& occur ) const
01142 {
01143   // This has been rewritten to eliminate the
01144   // possibly endless loop.
01145   // It assumes that by the time this routine is called,
01146   // all occurrences have valid environment data.
01147   
01148   return occur->getRandom();
01149 
01150 }
01151 
01152 
01153 /************/
01154 /*** dump ***/
01155 void
01156 SamplerImpl::dump() const
01157 {
01158   if ( _presence ) {
01159 
01160     _presence->dump( "Presences" );
01161   }
01162   if ( _absence ) {
01163 
01164     _absence->dump( "Absences" );
01165   }
01166 }
01167 
01168 
01169 /***********************/
01170 /**** split Sampler ****/
01171 void splitSampler(const SamplerPtr& orig, 
01172       SamplerPtr * train,
01173       SamplerPtr * test,
01174       double propTrain)
01175 {
01176   // split presences
01177   OccurrencesPtr presence = orig->getPresences();
01178 
01179   OccurrencesPtr test_presence;
01180 
01181   OccurrencesPtr train_presence;
01182 
01183   if ( presence ) {
01184 
01185     test_presence = new OccurrencesImpl( presence->label(), presence->coordSystem() );
01186 
01187     train_presence = new OccurrencesImpl( presence->label(), presence->coordSystem() );
01188 
01189     splitOccurrences( presence, train_presence, test_presence, propTrain );
01190   }
01191 
01192   // split absences
01193   OccurrencesPtr train_absence;
01194   OccurrencesPtr test_absence;
01195 
01196   OccurrencesPtr absence = orig->getAbsences();
01197 
01198   if ( absence ) { 
01199 
01200     test_absence = new OccurrencesImpl( absence->label(), absence->coordSystem() );
01201 
01202     train_absence = new OccurrencesImpl( absence->label(), absence->coordSystem() );
01203 
01204     splitOccurrences( absence, train_absence, test_absence, propTrain );
01205   }
01206 
01207   *train = new SamplerImpl( orig->getEnvironment(), 
01208                             train_presence, train_absence, 
01209                             orig->isNormalized() );
01210 
01211   *test = new SamplerImpl( orig->getEnvironment(),
01212                            test_presence, test_absence,
01213                            orig->isNormalized() );
01214 }
01215 
01216 /*************/
01217 /*** clone ***/
01218 SamplerPtr
01219 cloneSampler(const SamplerPtr& orig)
01220 {
01221   OccurrencesPtr presences;
01222   OccurrencesPtr absences;
01223 
01224   if ( orig->numPresence() ) {
01225 
01226     presences = orig->getPresences()->clone();
01227   }
01228 
01229   if ( orig->numAbsence() ) {
01230 
01231     absences = orig->getAbsences()->clone();
01232   }
01233 
01234   EnvironmentPtr environment = orig->getEnvironment()->clone();
01235 
01236   SamplerPtr fresh_sampler = createSampler( environment, presences, absences );
01237 
01238   return fresh_sampler;
01239 }
01240