openModeller
Version 1.4.0
|
00001 00028 #include <openmodeller/Environment.hh> 00029 #include <openmodeller/Occurrence.hh> 00030 #include <openmodeller/Configuration.hh> 00031 00032 #include <openmodeller/Sampler.hh> 00033 #include <openmodeller/Occurrences.hh> 00034 #include <openmodeller/Log.hh> 00035 #include <openmodeller/Random.hh> 00036 #include <openmodeller/env_io/Map.hh> 00037 #include <openmodeller/Model.hh> 00038 00039 #include <stdio.h> 00040 #include <string.h> 00041 #include <sstream> 00042 00043 #include <openmodeller/Exceptions.hh> 00044 00045 using std::string; 00046 00047 #undef DEBUG_MEMORY 00048 00049 /****************************************************************/ 00050 /*************************** Sampler ****************************/ 00051 00052 SamplerPtr createSampler( const EnvironmentPtr& env, 00053 const OccurrencesPtr& presence, 00054 const OccurrencesPtr& absence ) 00055 { 00056 return SamplerPtr( new SamplerImpl( env, presence, absence ) ); 00057 } 00058 00059 SamplerPtr createSampler( const ConstConfigurationPtr& config ) 00060 { 00061 SamplerPtr samp( new SamplerImpl() ); 00062 samp->setConfiguration( config ); 00063 return samp; 00064 } 00065 00066 /*******************/ 00067 /*** constructor ***/ 00068 00069 SamplerImpl::SamplerImpl() : 00070 ReferenceCountedObject(), 00071 _presence(), 00072 _absence(), 00073 _env(), 00074 _normalized( false ) 00075 { 00076 #ifdef DEBUG_MEMORY 00077 Log::instance()->debug("SamplerImpl::SamplerImpl() at %x\n",this); 00078 #endif 00079 } 00080 00081 SamplerImpl::SamplerImpl( const EnvironmentPtr& env, 00082 const OccurrencesPtr& presence, 00083 const OccurrencesPtr& absence, 00084 bool isNormalized ) : 00085 ReferenceCountedObject(), 00086 _presence( presence ), 00087 _absence( absence ), 00088 _env( env ), 00089 _normalized( isNormalized ) 00090 { 00091 #ifdef DEBUG_MEMORY 00092 Log::instance()->debug("SamplerImpl::SamplerImpl( args ) at %x\n",this); 00093 #endif 00094 setEnvironmentInOccurrences(); 00095 } 00096 00097 /******************/ 00098 /*** destructor ***/ 00099 00100 SamplerImpl::~SamplerImpl() 00101 { 00102 #ifdef DEBUG_MEMORY 00103 Log::instance()->debug("SamplerImpl::~SamplerImpl() at %x\n",this); 00104 #endif 00105 } 00106 00107 void 00108 SamplerImpl::setEnvironmentInOccurrences() 00109 { 00110 // Copy data from environment into the presence and absence points. 00111 00112 if ( _presence && ! _presence->hasEnvironment() ) { 00113 00114 _presence->setEnvironment( _env, "Presence" ); 00115 } 00116 00117 if ( _absence && ! _absence->hasEnvironment() ) { 00118 00119 _absence->setEnvironment( _env, "Absence" ); 00120 } 00121 } 00122 00123 /*********************/ 00124 /*** configuration ***/ 00125 00126 ConfigurationPtr 00127 SamplerImpl::getConfiguration( ) const 00128 { 00129 00130 ConfigurationPtr config( new ConfigurationImpl( "Sampler" ) ); 00131 00132 if ( _env ) { 00133 00134 config->addSubsection( _env->getConfiguration() ); 00135 } 00136 00137 // Even if there are zero presences, include the element if there no absences (the XML Schema mandates at least one of <Presence> or <Absence>) 00138 if ( _presence && ( _presence->numOccurrences() > 0 || ! _absence || _absence->numOccurrences() == 0) ) { 00139 00140 ConfigurationPtr cfg( _presence->getConfiguration() ); 00141 cfg->setName( "Presence" ); 00142 config->addSubsection( cfg ); 00143 } 00144 00145 if ( _absence && _absence->numOccurrences() > 0 ) { 00146 00147 ConfigurationPtr cfg( _absence->getConfiguration() ); 00148 cfg->setName( "Absence" ); 00149 config->addSubsection( cfg ); 00150 } 00151 00152 return config; 00153 } 00154 00155 void 00156 SamplerImpl::setConfiguration( const ConstConfigurationPtr& config ) 00157 { 00158 EnvironmentPtr env; 00159 if ( ConstConfigurationPtr env_config = config->getSubsection( "Environment", false ) ) { 00160 00161 env = createEnvironment(); 00162 env->setConfiguration( env_config ); 00163 } 00164 00165 // As of now, the configuration for occurrences do not set/get 00166 // abundance values. It's hard-coded here, 1 for presences, 0 for absences. 00167 Log::instance()->debug( "Getting presence\n" ); 00168 OccurrencesPtr presence( new OccurrencesImpl(1.0) ); 00169 presence->setConfiguration( config->getSubsection( "Presence" ) ); 00170 00171 Log::instance()->debug( "Getting absence\n" ); 00172 OccurrencesPtr absence; 00173 00174 if ( ConstConfigurationPtr absence_config = config->getSubsection( "Absence", false ) ) { 00175 00176 absence = new OccurrencesImpl(0.0); 00177 absence->setConfiguration( absence_config ); 00178 } 00179 00180 if ( ! presence->numOccurrences() ) { 00181 00182 std::string msg = "No presence points available.\n"; 00183 00184 Log::instance()->error( msg.c_str() ); 00185 00186 throw SamplerException( msg ); 00187 } 00188 00189 Log::instance()->debug( "Loaded %u presence(s)\n", presence->numOccurrences() ); 00190 00191 int num_absences = 0; 00192 00193 if ( absence ) { 00194 00195 num_absences = absence->numOccurrences(); 00196 } 00197 00198 Log::instance()->debug( "Loaded %u absence(s)\n", num_absences ); 00199 00200 _env = env; 00201 _presence = presence; 00202 _absence = absence; 00203 00204 setEnvironmentInOccurrences(); 00205 } 00206 00207 /******************/ 00208 /*** get MinMax ***/ 00209 void SamplerImpl::getMinMax( Sample * min, Sample * max ) const 00210 { 00211 // normalize samples in occs objects 00212 // first get all occurrence objects in the same container 00213 OccurrencesPtr allOccs; 00214 00215 if ( _presence ) { 00216 00217 allOccs = new OccurrencesImpl( _presence->label(), _presence->coordSystem() ); 00218 } 00219 else { 00220 00221 allOccs = new OccurrencesImpl( _absence->label(), _absence->coordSystem() ); 00222 } 00223 00224 allOccs->appendFrom( _presence ); 00225 allOccs->appendFrom( _absence ); 00226 00227 // now compute normalization parameters 00228 allOccs->getMinMax( min, max ); 00229 } 00230 00231 /*****************/ 00232 /*** normalize ***/ 00233 void SamplerImpl::normalize( Normalizer * normalizerPtr ) 00234 { 00235 // Avoid renormalizing the sampler 00236 if ( _normalized ) { 00237 00238 return; 00239 } 00240 00241 if ( ! _env ) { 00242 00243 std::string msg = "Cannot normalize sampler without an Environment.\n"; 00244 00245 Log::instance()->error( msg.c_str() ); 00246 00247 throw SamplerException( msg ); 00248 } 00249 00250 // set env in all occurrences before normalizing env so that 00251 // occurrences get the unnormalized values 00252 setEnvironmentInOccurrences(); 00253 _env->normalize( normalizerPtr ); 00254 00255 // need to normalize presences and absences even if _env is present 00256 // because environment in occurrences was set with unnormalized values 00257 // if _env doesn't exist, then normalize occurrences anyway 00258 if ( _presence && _presence->numOccurrences() ) { 00259 00260 _presence->normalize( normalizerPtr, _env->numCategoricalLayers() ); 00261 } 00262 00263 if ( _absence && _absence->numOccurrences() ) { 00264 00265 _absence->normalize( normalizerPtr, _env->numCategoricalLayers() ); 00266 } 00267 00268 _normalized = true; 00269 } 00270 00271 /*****************/ 00272 /*** normalize ***/ 00273 void SamplerImpl::resetNormalization() 00274 { 00275 if ( _normalized ) { 00276 00277 _env->resetNormalization(); 00278 00279 if ( _presence && _presence->numOccurrences() ) { 00280 00281 _presence->resetNormalization(); 00282 } 00283 00284 if ( _absence && _absence->numOccurrences() ) { 00285 00286 _absence->resetNormalization(); 00287 } 00288 00289 _normalized = false; 00290 } 00291 } 00292 00293 /***********************/ 00294 /*** num Independent ***/ 00295 int 00296 SamplerImpl::numIndependent() const 00297 { 00298 if ( _env ) { 00299 // get number of dimensions from environment object if it exists 00300 return _env->numLayers(); 00301 } 00302 else if ( _presence && _presence->hasEnvironment() ) { 00303 // otherwise try to get it from presences 00304 return _presence->dimension(); 00305 } 00306 else if ( _absence && _absence->hasEnvironment() ) { 00307 // otherwise try to get it from absences 00308 return _absence->dimension(); 00309 } 00310 00311 // neither object has dimensions defined 00312 return 0; 00313 } 00314 00315 00316 /*********************/ 00317 /*** num Dependent ***/ 00318 int 00319 SamplerImpl::numDependent() const 00320 { 00321 return _presence ? _presence->numAttributes() : _absence->numAttributes(); 00322 } 00323 00324 00325 /********************/ 00326 /*** num Presence ***/ 00327 int 00328 SamplerImpl::numPresence() const 00329 { 00330 return _presence ? _presence->numOccurrences() : 0; 00331 } 00332 00333 00334 /*******************/ 00335 /*** num Absence ***/ 00336 int 00337 SamplerImpl::numAbsence() const 00338 { 00339 return _absence ? _absence->numOccurrences() : 0; 00340 } 00341 00342 00343 /**********************/ 00344 /*** get One Sample ***/ 00345 ConstOccurrencePtr 00346 SamplerImpl::getOneSample( ) const 00347 { 00348 Random rnd; 00349 00350 if ( ! _presence ) { 00351 00352 std::string msg = "No presence points available for sampling.\n"; 00353 00354 Log::instance()->error( msg.c_str() ); 00355 00356 throw SamplerException( msg ); 00357 } 00358 00359 if ( ! _presence->numOccurrences() ) { 00360 00361 std::string msg = "Cannot use zero presence points for sampling.\n"; 00362 00363 Log::instance()->error( msg.c_str() ); 00364 00365 throw SamplerException( msg ); 00366 } 00367 00368 // Probability of 0.5 of get a presence point. 00369 if ( rnd() < 0.5 ) { 00370 00371 return getPresence(); 00372 } 00373 00374 // Probability of 0.5 of get an absence point. 00375 // (if there are real absence points...) 00376 if ( _absence && _absence->numOccurrences() ) { 00377 00378 return getAbsence(); 00379 } 00380 00381 return getPseudoAbsence(); 00382 } 00383 00384 /******************************/ 00385 /*** generate Random Sample ***/ 00386 OccurrencePtr 00387 SamplerImpl::generateRandomSample(Scalar abundance) const 00388 { 00389 if ( ! _env ) { 00390 00391 std::string msg = "Cannot generate random samples without an Environment object.\n"; 00392 00393 Log::instance()->error( msg.c_str() ); 00394 00395 throw SamplerException( msg ); 00396 } 00397 00398 // Generate a random sample 00399 static const Sample mysample( numDependent() ); 00400 Coord x,y; 00401 00402 Sample env( _env->getRandom( &x, &y ) ); 00403 00404 OccurrencePtr oc = new OccurrenceImpl( "?", x, y, 0.0, abundance, mysample, env ); 00405 00406 return oc; 00407 } 00408 00409 /**************************/ 00410 /*** get Pseudo Absence ***/ 00411 OccurrencePtr 00412 SamplerImpl::getPseudoAbsence() const 00413 { 00414 return generateRandomSample(0.0); 00415 } 00416 00417 /**************************/ 00418 /*** get Pseudo Presence ***/ 00419 OccurrencePtr 00420 SamplerImpl::getPseudoPresence() const 00421 { 00422 return generateRandomSample(1.0); 00423 } 00424 00425 /**************************/ 00426 /*** get Pseudo Absence ***/ 00427 OccurrencePtr 00428 SamplerImpl::getPseudoAbsence( const Model& model, const Scalar threshold ) const 00429 { 00430 double prob = 0.0; 00431 00432 OccurrencePtr occ; 00433 00434 int max_loop = 5000; 00435 00436 int loop = 0; 00437 00438 do { 00439 00440 occ = getPseudoAbsence(); 00441 00442 if ( model ) { 00443 00444 prob = model->getValue( occ->environment() ); 00445 } 00446 00447 loop++; 00448 00449 } while ( ( prob >= threshold ) && ( loop < max_loop ) ); 00450 00451 if ( loop == max_loop ) { 00452 00453 std::string msg = "Exceeded maximum number of attempts to generate point outside model.\n"; 00454 00455 Log::instance()->error( msg.c_str() ); 00456 00457 throw SamplerException( msg ); 00458 } 00459 00460 return occ; 00461 } 00462 00463 /**************************/ 00464 /*** get Pseudo Presence ***/ 00465 OccurrencePtr 00466 SamplerImpl::getPseudoPresence( const Model& model, const Scalar threshold ) const 00467 { 00468 double prob = 0.0; 00469 00470 OccurrencePtr occ; 00471 00472 int max_loop = 5000; 00473 00474 int loop = 0; 00475 00476 do { 00477 00478 occ = getPseudoPresence(); 00479 00480 if ( model ) { 00481 00482 prob = model->getValue( occ->environment() ); 00483 } 00484 00485 loop++; 00486 00487 } while ( ( prob < threshold ) && ( loop < max_loop ) ); 00488 00489 if ( loop == max_loop ) { 00490 00491 std::string msg = "Exceeded maximum number of attempts to generate point inside model.\n"; 00492 00493 Log::instance()->error( msg.c_str() ); 00494 00495 throw SamplerException( msg ); 00496 } 00497 00498 return occ; 00499 } 00500 00501 /**************************/ 00502 /*** get Pseudo Absence ***/ 00503 OccurrencePtr 00504 SamplerImpl::getPseudoAbsenceOutsideInterval( const Sample * minimum, const Sample * maximum ) const 00505 { 00506 bool not_found = true; 00507 00508 OccurrencePtr occ; 00509 Sample x; 00510 00511 int max_loop = 5000; 00512 00513 int loop = 0; 00514 00515 do { 00516 00517 occ = getPseudoAbsence(); 00518 x = occ->environment(); 00519 00520 for ( unsigned int i = 0; i < x.size(); i++ ) { 00521 00522 if ( x.size() == 0 ) 00523 break; 00524 00525 if ( x[i] < (*minimum)[i] || x[i] > (*maximum)[i] ) { 00526 00527 not_found = false; 00528 break; 00529 } 00530 } 00531 00532 loop++; 00533 00534 } while ( ( not_found ) && ( loop < max_loop ) ); 00535 00536 if ( loop == max_loop ) { 00537 00538 std::string msg = "Exceeded maximum number of attempts to generate point outside interval.\n"; 00539 00540 Log::instance()->error( msg.c_str() ); 00541 00542 throw SamplerException( msg ); 00543 } 00544 00545 return occ; 00546 } 00547 00548 /**************************/ 00549 /*** get Pseudo Presence ***/ 00550 OccurrencePtr 00551 SamplerImpl::getPseudoPresenceInsideInterval( const Sample * minimum, const Sample * maximum ) const 00552 { 00553 bool not_found = true; 00554 00555 OccurrencePtr occ; 00556 Sample x; 00557 00558 int max_loop = 5000; 00559 00560 int loop = 0; 00561 00562 do { 00563 00564 occ = getPseudoPresence(); 00565 x = occ->environment(); 00566 00567 for ( unsigned int i = 0; i < x.size(); i++ ) { 00568 00569 if ( x.size() == 0 ) 00570 break; 00571 00572 if ( x[i] >= (*minimum)[i] && x[i] <= (*maximum)[i] ) { 00573 00574 not_found = false; 00575 break; 00576 } 00577 } 00578 00579 loop++; 00580 00581 } while ( ( not_found ) && ( loop < max_loop ) ); 00582 00583 if ( loop == max_loop ) { 00584 00585 std::string msg = "Exceeded maximum number of attempts to generate point inside interval.\n"; 00586 00587 Log::instance()->error( msg.c_str() ); 00588 00589 throw SamplerException( msg ); 00590 } 00591 00592 return occ; 00593 } 00594 00595 /***************************/ 00596 /*** get Pseudo Absences ***/ 00597 OccurrencesPtr 00598 SamplerImpl::getPseudoAbsences( const int& numPoints, const Model& model, const Scalar threshold, const bool geoUnique, const bool envUnique, const int idSequenceStart) const 00599 { 00600 int i = 0; 00601 00602 OccurrencesPtr occurrences( new OccurrencesImpl(0.0) ); 00603 00604 do 00605 { 00606 OccurrencePtr point; 00607 00608 if ( model ) { 00609 00610 point = getPseudoAbsence( model, threshold ); 00611 } 00612 else { 00613 00614 point = getPseudoAbsence(); 00615 } 00616 00617 if ( geoUnique ) { 00618 00619 if ( envUnique ) { 00620 00621 if ( isEnvironmentallyUnique( occurrences, point ) && 00622 isEnvironmentallyUnique( _presence, point ) && 00623 isEnvironmentallyUnique( _absence, point ) ) { 00624 00625 std::ostringstream oss; 00626 oss << idSequenceStart+i; 00627 point->setId( oss.str() ); 00628 occurrences->insert( point ); 00629 i++; 00630 } 00631 } 00632 else { 00633 00634 if ( isSpatiallyUnique( occurrences, point ) && 00635 isSpatiallyUnique( _presence, point ) && 00636 isSpatiallyUnique( _absence, point ) ) { 00637 00638 std::ostringstream oss; 00639 oss << idSequenceStart+i; 00640 point->setId( oss.str() ); 00641 occurrences->insert( point ); 00642 i++; 00643 } 00644 } 00645 } 00646 else { 00647 00648 if ( envUnique ) { 00649 00650 if ( isEnvironmentallyUnique( occurrences, point ) && 00651 isEnvironmentallyUnique( _presence, point ) && 00652 isEnvironmentallyUnique( _absence, point ) ) { 00653 00654 std::ostringstream oss; 00655 oss << idSequenceStart+i; 00656 point->setId( oss.str() ); 00657 occurrences->insert( point ); 00658 i++; 00659 } 00660 } 00661 else { 00662 00663 std::ostringstream oss; 00664 oss << idSequenceStart+i; 00665 point->setId( oss.str() ); 00666 occurrences->insert( point ); 00667 i++; 00668 } 00669 } 00670 00671 } while ( i < numPoints ); 00672 00673 return occurrences; 00674 } 00675 00676 /***************************/ 00677 /*** get Pseudo Presences ***/ 00678 OccurrencesPtr 00679 SamplerImpl::getPseudoPresences( const int& numPoints, const Model& model, const Scalar threshold, const bool geoUnique, const bool envUnique, const int idSequenceStart) const 00680 { 00681 int i = 0; 00682 00683 OccurrencesPtr occurrences( new OccurrencesImpl(1.0) ); 00684 00685 do 00686 { 00687 OccurrencePtr point; 00688 00689 if ( model ) { 00690 00691 point = getPseudoPresence( model, threshold ); 00692 } 00693 else { 00694 00695 point = getPseudoPresence(); 00696 } 00697 00698 if ( geoUnique ) { 00699 00700 if ( envUnique ) { 00701 00702 if ( isEnvironmentallyUnique( occurrences, point ) && 00703 isEnvironmentallyUnique( _presence, point ) && 00704 isEnvironmentallyUnique( _absence, point ) ) { 00705 00706 std::ostringstream oss; 00707 oss << idSequenceStart+i; 00708 point->setId( oss.str() ); 00709 occurrences->insert( point ); 00710 i++; 00711 } 00712 } 00713 else { 00714 00715 if ( isSpatiallyUnique( occurrences, point ) && 00716 isSpatiallyUnique( _presence, point ) && 00717 isSpatiallyUnique( _absence, point ) ) { 00718 00719 std::ostringstream oss; 00720 oss << idSequenceStart+i; 00721 point->setId( oss.str() ); 00722 occurrences->insert( point ); 00723 i++; 00724 } 00725 } 00726 } 00727 else { 00728 00729 if ( envUnique ) { 00730 00731 if ( isEnvironmentallyUnique( occurrences, point ) && 00732 isEnvironmentallyUnique( _presence, point ) && 00733 isEnvironmentallyUnique( _absence, point ) ) { 00734 00735 std::ostringstream oss; 00736 oss << idSequenceStart+i; 00737 point->setId( oss.str() ); 00738 occurrences->insert( point ); 00739 i++; 00740 } 00741 } 00742 else { 00743 00744 std::ostringstream oss; 00745 oss << idSequenceStart+i; 00746 point->setId( oss.str() ); 00747 occurrences->insert( point ); 00748 i++; 00749 } 00750 } 00751 00752 } while ( i < numPoints ); 00753 00754 return occurrences; 00755 } 00756 00757 /***************************/ 00758 /*** get Pseudo Absences ***/ 00759 OccurrencesPtr 00760 SamplerImpl::getPseudoAbsences( const int& numPoints, const Sample * minimum, const Sample * maximum, const bool geoUnique, const bool envUnique, const int idSequenceStart) const 00761 { 00762 int i = 0; 00763 00764 OccurrencesPtr occurrences( new OccurrencesImpl(0.0) ); 00765 00766 do 00767 { 00768 OccurrencePtr point; 00769 00770 point = getPseudoAbsenceOutsideInterval( minimum, maximum ); 00771 00772 if ( geoUnique ) { 00773 00774 if ( envUnique ) { 00775 00776 if ( isEnvironmentallyUnique( occurrences, point ) && 00777 isEnvironmentallyUnique( _presence, point ) && 00778 isEnvironmentallyUnique( _absence, point ) ) { 00779 00780 std::ostringstream oss; 00781 oss << idSequenceStart+i; 00782 point->setId( oss.str() ); 00783 occurrences->insert( point ); 00784 i++; 00785 } 00786 } 00787 else { 00788 00789 if ( isSpatiallyUnique( occurrences, point ) && 00790 isSpatiallyUnique( _presence, point ) && 00791 isSpatiallyUnique( _absence, point ) ) { 00792 00793 std::ostringstream oss; 00794 oss << idSequenceStart+i; 00795 point->setId( oss.str() ); 00796 occurrences->insert( point ); 00797 i++; 00798 } 00799 } 00800 } 00801 else { 00802 00803 if ( envUnique ) { 00804 00805 if ( isEnvironmentallyUnique( occurrences, point ) && 00806 isEnvironmentallyUnique( _presence, point ) && 00807 isEnvironmentallyUnique( _absence, point ) ) { 00808 00809 std::ostringstream oss; 00810 oss << idSequenceStart+i; 00811 point->setId( oss.str() ); 00812 occurrences->insert( point ); 00813 i++; 00814 } 00815 } 00816 else { 00817 00818 std::ostringstream oss; 00819 oss << idSequenceStart+i; 00820 point->setId( oss.str() ); 00821 occurrences->insert( point ); 00822 i++; 00823 } 00824 } 00825 00826 } while ( i < numPoints ); 00827 00828 return occurrences; 00829 } 00830 00831 /***************************/ 00832 /*** get Pseudo Presences ***/ 00833 OccurrencesPtr 00834 SamplerImpl::getPseudoPresences( const int& numPoints, const Sample * minimum, const Sample * maximum, const bool geoUnique, const bool envUnique, const int idSequenceStart) const 00835 { 00836 int i = 0; 00837 00838 OccurrencesPtr occurrences( new OccurrencesImpl(1.0) ); 00839 00840 do 00841 { 00842 OccurrencePtr point; 00843 00844 point = getPseudoPresenceInsideInterval( minimum, maximum ); 00845 00846 if ( geoUnique ) { 00847 00848 if ( envUnique ) { 00849 00850 if ( isEnvironmentallyUnique( occurrences, point ) && 00851 isEnvironmentallyUnique( _presence, point ) && 00852 isEnvironmentallyUnique( _absence, point ) ) { 00853 00854 std::ostringstream oss; 00855 oss << idSequenceStart+i; 00856 point->setId( oss.str() ); 00857 occurrences->insert( point ); 00858 i++; 00859 } 00860 } 00861 else { 00862 00863 if ( isSpatiallyUnique( occurrences, point ) && 00864 isSpatiallyUnique( _presence, point ) && 00865 isSpatiallyUnique( _absence, point ) ) { 00866 00867 std::ostringstream oss; 00868 oss << idSequenceStart+i; 00869 point->setId( oss.str() ); 00870 occurrences->insert( point ); 00871 i++; 00872 } 00873 } 00874 } 00875 else { 00876 00877 if ( envUnique ) { 00878 00879 if ( isEnvironmentallyUnique( occurrences, point ) && 00880 isEnvironmentallyUnique( _presence, point ) && 00881 isEnvironmentallyUnique( _absence, point ) ) { 00882 00883 std::ostringstream oss; 00884 oss << idSequenceStart+i; 00885 point->setId( oss.str() ); 00886 occurrences->insert( point ); 00887 i++; 00888 } 00889 } 00890 else { 00891 00892 std::ostringstream oss; 00893 oss << idSequenceStart+i; 00894 point->setId( oss.str() ); 00895 occurrences->insert( point ); 00896 i++; 00897 } 00898 } 00899 00900 } while ( i < numPoints ); 00901 00902 return occurrences; 00903 } 00904 00905 /**********************/ 00906 /*** is Categorical ***/ 00907 int 00908 SamplerImpl::isCategorical( int i ) 00909 { 00910 if ( _env ) { 00911 00912 return _env->isCategorical( i ); 00913 } 00914 else { 00915 // if there is no environment obj, assumes all variables are continuous 00916 // right now there is no mechanism to define whether a variable is 00917 // continuous or categorical when occurrences already come with their 00918 // samples populated 00919 return false; 00920 } 00921 } 00922 00923 00924 /******************************/ 00925 /*** environmentally unique ***/ 00926 void 00927 SamplerImpl::environmentallyUnique( ) 00928 { 00929 Log::instance()->info( "Applying filter: enviromentally unique\n" ); 00930 00931 // Presences 00932 this->environmentallyUnique( _presence, "Presence" ); 00933 00934 // Absences 00935 this->environmentallyUnique( _absence, "Absence" ); 00936 } 00937 00938 00939 /******************************/ 00940 /*** environmentally unique ***/ 00941 void 00942 SamplerImpl::environmentallyUnique( OccurrencesPtr& occurrencesPtr, const char *type ) 00943 { 00944 if ( ! ( occurrencesPtr && occurrencesPtr->numOccurrences() ) ) { 00945 00946 return; 00947 } 00948 00949 OccurrencesImpl::iterator it = occurrencesPtr->begin(); 00950 OccurrencesImpl::iterator last = occurrencesPtr->end(); 00951 00952 while ( it != last ) { 00953 00954 Sample sample = _env->getUnnormalized( (*it)->x(), (*it)->y() ); 00955 00956 OccurrencesImpl::iterator next = it + 1; 00957 00958 while ( next != last ) { 00959 00960 Sample nextSample = _env->getUnnormalized( (*next)->x(), (*next)->y() ); 00961 00962 if ( sample.equals( nextSample ) ) { 00963 00964 Log::instance()->info( "%s Point \"%s\" at (%f,%f) has no unique environment. It will be discarded.\n", type, ((*next)->id()).c_str(), (*next)->x(), (*next)->y() ); 00965 00966 // Remove duplicates 00967 next = occurrencesPtr->erase( next ); 00968 last = occurrencesPtr->end(); 00969 00970 // Increase abundance in original occurence 00971 (*it)->setAbundance( (*it)->abundance() + 1 ); 00972 00973 // No need to increment "next" because "erase" actually swaps 00974 // the last element with the one that was just erased! 00975 } 00976 else { 00977 00978 ++next; 00979 } 00980 } 00981 00982 ++it; 00983 } 00984 } 00985 00986 00987 /************************/ 00988 /*** spatially unique ***/ 00989 void 00990 SamplerImpl::spatiallyUnique( ) 00991 { 00992 Log::instance()->info( "Applying filter: spatially unique\n" ); 00993 00994 // Presences 00995 this->spatiallyUnique( _presence, "Presence" ); 00996 00997 // Absences 00998 this->spatiallyUnique( _absence, "Absence" ); 00999 } 01000 01001 01002 /************************/ 01003 /*** spatially unique ***/ 01004 void 01005 SamplerImpl::spatiallyUnique( OccurrencesPtr& occurrencesPtr, const char *type ) 01006 { 01007 if ( ! ( occurrencesPtr && occurrencesPtr->numOccurrences() ) ) { 01008 01009 return; 01010 } 01011 01012 Map *mask = _env->getMask(); 01013 01014 // If mask is undefined, use first layer as a mask 01015 if ( ! mask ) { 01016 01017 mask = _env->getLayer( 0 ); 01018 } 01019 01020 OccurrencesImpl::iterator it = occurrencesPtr->begin(); 01021 OccurrencesImpl::iterator last = occurrencesPtr->end(); 01022 01023 while ( it != last ) { 01024 01025 int row, col; 01026 01027 mask->getRowColumn( (*it)->x(), (*it)->y(), &row, &col ); 01028 01029 OccurrencesImpl::iterator next = it + 1; 01030 01031 while ( next != last ) { 01032 01033 int nextRow, nextCol; 01034 01035 mask->getRowColumn( (*next)->x(), (*next)->y(), &nextRow, &nextCol ); 01036 01037 if ( row == nextRow && col == nextCol ) { 01038 01039 Log::instance()->info( "%s Point \"%s\" at (%f,%f) has no unique geography. It will be discarded.\n", type, ((*next)->id()).c_str(), (*next)->x(), (*next)->y() ); 01040 01041 // Remove duplicates 01042 next = occurrencesPtr->erase( next ); 01043 last = occurrencesPtr->end(); 01044 01045 // Increase abundance in original occurence 01046 (*it)->setAbundance( (*it)->abundance() + 1 ); 01047 01048 // No need to increment "next" because "erase" actually swaps 01049 // the last element with the one that was just erased! 01050 } 01051 else { 01052 01053 ++next; 01054 } 01055 } 01056 01057 ++it; 01058 } 01059 } 01060 01061 01062 /*********************************/ 01063 /*** is Environmentally Unique ***/ 01064 bool 01065 SamplerImpl::isEnvironmentallyUnique( const OccurrencesPtr& occurrences, const OccurrencePtr& point )const 01066 { 01067 if ( ! ( occurrences && occurrences->numOccurrences() ) ) { 01068 01069 return true; 01070 } 01071 01072 OccurrencesImpl::iterator it = occurrences->begin(); 01073 OccurrencesImpl::iterator last = occurrences->end(); 01074 01075 Sample sample = _env->getUnnormalized( point->x(), point->y() ); 01076 01077 while ( it != last ) { 01078 01079 Sample nextSample = _env->getUnnormalized( (*it)->x(), (*it)->y() ); 01080 01081 if ( sample.equals( nextSample ) ) { 01082 01083 return false; 01084 } 01085 else { 01086 01087 ++it; 01088 } 01089 } 01090 01091 return true; 01092 } 01093 01094 /***************************/ 01095 /*** is Spatially Unique ***/ 01096 bool 01097 SamplerImpl::isSpatiallyUnique( const OccurrencesPtr& occurrences, const OccurrencePtr& point ) const 01098 { 01099 if ( ! ( occurrences && occurrences->numOccurrences() ) ) { 01100 01101 return true; 01102 } 01103 01104 Map *mask = _env->getMask(); 01105 01106 // If mask is undefined, use first layer as a mask 01107 if ( ! mask ) { 01108 01109 mask = _env->getLayer( 0 ); 01110 } 01111 01112 OccurrencesImpl::iterator it = occurrences->begin(); 01113 OccurrencesImpl::iterator last = occurrences->end(); 01114 01115 int row, col; 01116 mask->getRowColumn( point->x(), point->y(), &row, &col ); 01117 01118 while ( it != last ) { 01119 01120 int next_row, next_col; 01121 01122 mask->getRowColumn( (*it)->x(), (*it)->y(), &next_row, &next_col ); 01123 01124 if ( row == next_row && col == next_col ) { 01125 01126 return false; 01127 } 01128 else { 01129 01130 ++it; 01131 } 01132 } 01133 01134 return true; 01135 } 01136 01137 01138 /*****************************/ 01139 /*** get Random Occurrence ***/ 01140 ConstOccurrencePtr 01141 SamplerImpl::getRandomOccurrence( const OccurrencesPtr& occur ) const 01142 { 01143 // This has been rewritten to eliminate the 01144 // possibly endless loop. 01145 // It assumes that by the time this routine is called, 01146 // all occurrences have valid environment data. 01147 01148 return occur->getRandom(); 01149 01150 } 01151 01152 01153 /************/ 01154 /*** dump ***/ 01155 void 01156 SamplerImpl::dump() const 01157 { 01158 if ( _presence ) { 01159 01160 _presence->dump( "Presences" ); 01161 } 01162 if ( _absence ) { 01163 01164 _absence->dump( "Absences" ); 01165 } 01166 } 01167 01168 01169 /***********************/ 01170 /**** split Sampler ****/ 01171 void splitSampler(const SamplerPtr& orig, 01172 SamplerPtr * train, 01173 SamplerPtr * test, 01174 double propTrain) 01175 { 01176 // split presences 01177 OccurrencesPtr presence = orig->getPresences(); 01178 01179 OccurrencesPtr test_presence; 01180 01181 OccurrencesPtr train_presence; 01182 01183 if ( presence ) { 01184 01185 test_presence = new OccurrencesImpl( presence->label(), presence->coordSystem() ); 01186 01187 train_presence = new OccurrencesImpl( presence->label(), presence->coordSystem() ); 01188 01189 splitOccurrences( presence, train_presence, test_presence, propTrain ); 01190 } 01191 01192 // split absences 01193 OccurrencesPtr train_absence; 01194 OccurrencesPtr test_absence; 01195 01196 OccurrencesPtr absence = orig->getAbsences(); 01197 01198 if ( absence ) { 01199 01200 test_absence = new OccurrencesImpl( absence->label(), absence->coordSystem() ); 01201 01202 train_absence = new OccurrencesImpl( absence->label(), absence->coordSystem() ); 01203 01204 splitOccurrences( absence, train_absence, test_absence, propTrain ); 01205 } 01206 01207 *train = new SamplerImpl( orig->getEnvironment(), 01208 train_presence, train_absence, 01209 orig->isNormalized() ); 01210 01211 *test = new SamplerImpl( orig->getEnvironment(), 01212 test_presence, test_absence, 01213 orig->isNormalized() ); 01214 } 01215 01216 /*************/ 01217 /*** clone ***/ 01218 SamplerPtr 01219 cloneSampler(const SamplerPtr& orig) 01220 { 01221 OccurrencesPtr presences; 01222 OccurrencesPtr absences; 01223 01224 if ( orig->numPresence() ) { 01225 01226 presences = orig->getPresences()->clone(); 01227 } 01228 01229 if ( orig->numAbsence() ) { 01230 01231 absences = orig->getAbsences()->clone(); 01232 } 01233 01234 EnvironmentPtr environment = orig->getEnvironment()->clone(); 01235 01236 SamplerPtr fresh_sampler = createSampler( environment, presences, absences ); 01237 01238 return fresh_sampler; 01239 } 01240