openModeller  Version 1.5.0
Sampler.cpp
Go to the documentation of this file.
1 
31 
32 #include <openmodeller/Sampler.hh>
34 #include <openmodeller/Log.hh>
35 #include <openmodeller/Random.hh>
37 #include <openmodeller/Model.hh>
38 
39 #include <stdio.h>
40 #include <string.h>
41 #include <sstream>
42 
44 
45 using std::string;
46 
47 #undef DEBUG_MEMORY
48 
49 /****************************************************************/
50 /*************************** Sampler ****************************/
51 
53  const OccurrencesPtr& presence,
54  const OccurrencesPtr& absence )
55 {
56  return SamplerPtr( new SamplerImpl( env, presence, absence ) );
57 }
58 
60 {
61  SamplerPtr samp( new SamplerImpl() );
62  samp->setConfiguration( config );
63  return samp;
64 }
65 
66 /*******************/
67 /*** constructor ***/
68 
71  _presence(),
72  _absence(),
73  _env(),
74  _normalized( false )
75 {
76 #ifdef DEBUG_MEMORY
77  Log::instance()->debug("SamplerImpl::SamplerImpl() at %x\n",this);
78 #endif
79 }
80 
82  const OccurrencesPtr& presence,
83  const OccurrencesPtr& absence,
84  bool isNormalized ) :
86  _presence( presence ),
87  _absence( absence ),
88  _env( env ),
89  _normalized( isNormalized )
90 {
91 #ifdef DEBUG_MEMORY
92  Log::instance()->debug("SamplerImpl::SamplerImpl( args ) at %x\n",this);
93 #endif
95 }
96 
97 /******************/
98 /*** destructor ***/
99 
101 {
102 #ifdef DEBUG_MEMORY
103  Log::instance()->debug("SamplerImpl::~SamplerImpl() at %x\n",this);
104 #endif
105 }
106 
107 void
109 {
110  // Copy data from environment into the presence and absence points.
111 
112  if ( _presence && ! _presence->hasEnvironment() ) {
113 
114  _presence->setEnvironment( _env, "Presence" );
115  }
116 
117  if ( _absence && ! _absence->hasEnvironment() ) {
118 
119  _absence->setEnvironment( _env, "Absence" );
120  }
121 }
122 
123 /*********************/
124 /*** configuration ***/
125 
128 {
129 
130  ConfigurationPtr config( new ConfigurationImpl( "Sampler" ) );
131 
132  if ( _env ) {
133 
134  config->addSubsection( _env->getConfiguration() );
135  }
136 
137  // Even if there are zero presences, include the element if there no absences (the XML Schema mandates at least one of <Presence> or <Absence>)
138  if ( _presence && ( _presence->numOccurrences() > 0 || ! _absence || _absence->numOccurrences() == 0) ) {
139 
140  ConfigurationPtr cfg( _presence->getConfiguration() );
141  cfg->setName( "Presence" );
142  config->addSubsection( cfg );
143  }
144 
145  if ( _absence && _absence->numOccurrences() > 0 ) {
146 
147  ConfigurationPtr cfg( _absence->getConfiguration() );
148  cfg->setName( "Absence" );
149  config->addSubsection( cfg );
150  }
151 
152  return config;
153 }
154 
155 void
157 {
158  EnvironmentPtr env;
159  if ( ConstConfigurationPtr env_config = config->getSubsection( "Environment", false ) ) {
160 
161  env = createEnvironment();
162  env->setConfiguration( env_config );
163  }
164 
165  // As of now, the configuration for occurrences do not set/get
166  // abundance values. It's hard-coded here, 1 for presences, 0 for absences.
167  Log::instance()->debug( "Getting presence\n" );
168  OccurrencesPtr presence( new OccurrencesImpl(1.0) );
169  presence->setConfiguration( config->getSubsection( "Presence" ) );
170 
171  Log::instance()->debug( "Getting absence\n" );
172  OccurrencesPtr absence;
173 
174  if ( ConstConfigurationPtr absence_config = config->getSubsection( "Absence", false ) ) {
175 
176  absence = new OccurrencesImpl(0.0);
177  absence->setConfiguration( absence_config );
178  }
179 
180  if ( ! presence->numOccurrences() ) {
181 
182  std::string msg = "No presence points available.\n";
183 
184  Log::instance()->error( msg.c_str() );
185 
186  throw SamplerException( msg );
187  }
188 
189  Log::instance()->debug( "Loaded %u presence(s)\n", presence->numOccurrences() );
190 
191  int num_absences = 0;
192 
193  if ( absence ) {
194 
195  num_absences = absence->numOccurrences();
196  }
197 
198  Log::instance()->debug( "Loaded %u absence(s)\n", num_absences );
199 
200  _env = env;
201  _presence = presence;
202  _absence = absence;
203 
205 }
206 
207 /******************/
208 /*** get MinMax ***/
209 void SamplerImpl::getMinMax( Sample * min, Sample * max ) const
210 {
211  // normalize samples in occs objects
212  // first get all occurrence objects in the same container
213  OccurrencesPtr allOccs;
214 
215  if ( _presence ) {
216 
217  allOccs = new OccurrencesImpl( _presence->label(), _presence->coordSystem() );
218  }
219  else {
220 
221  allOccs = new OccurrencesImpl( _absence->label(), _absence->coordSystem() );
222  }
223 
224  allOccs->appendFrom( _presence );
225  allOccs->appendFrom( _absence );
226 
227  // now compute normalization parameters
228  allOccs->getMinMax( min, max );
229 }
230 
231 /*****************/
232 /*** normalize ***/
233 void SamplerImpl::normalize( Normalizer * normalizerPtr )
234 {
235  // Avoid renormalizing the sampler
236  if ( _normalized ) {
237 
238  return;
239  }
240 
241  if ( ! _env ) {
242 
243  std::string msg = "Cannot normalize sampler without an Environment.\n";
244 
245  Log::instance()->error( msg.c_str() );
246 
247  throw SamplerException( msg );
248  }
249 
250  // set env in all occurrences before normalizing env so that
251  // occurrences get the unnormalized values
253  _env->normalize( normalizerPtr );
254 
255  // need to normalize presences and absences even if _env is present
256  // because environment in occurrences was set with unnormalized values
257  // if _env doesn't exist, then normalize occurrences anyway
258  if ( _presence && _presence->numOccurrences() ) {
259 
260  _presence->normalize( normalizerPtr, _env->numCategoricalLayers() );
261  }
262 
263  if ( _absence && _absence->numOccurrences() ) {
264 
265  _absence->normalize( normalizerPtr, _env->numCategoricalLayers() );
266  }
267 
268  _normalized = true;
269 }
270 
271 /*****************/
272 /*** normalize ***/
274 {
275  if ( _normalized ) {
276 
277  _env->resetNormalization();
278 
279  if ( _presence && _presence->numOccurrences() ) {
280 
281  _presence->resetNormalization();
282  }
283 
284  if ( _absence && _absence->numOccurrences() ) {
285 
286  _absence->resetNormalization();
287  }
288 
289  _normalized = false;
290  }
291 }
292 
293 /***********************/
294 /*** num Independent ***/
295 int
297 {
298  if ( _env ) {
299  // get number of dimensions from environment object if it exists
300  return _env->numLayers();
301  }
302  else if ( _presence && _presence->hasEnvironment() ) {
303  // otherwise try to get it from presences
304  return _presence->dimension();
305  }
306  else if ( _absence && _absence->hasEnvironment() ) {
307  // otherwise try to get it from absences
308  return _absence->dimension();
309  }
310 
311  // neither object has dimensions defined
312  return 0;
313 }
314 
315 
316 /*********************/
317 /*** num Dependent ***/
318 int
320 {
321  return _presence ? _presence->numAttributes() : _absence->numAttributes();
322 }
323 
324 
325 /********************/
326 /*** num Presence ***/
327 int
329 {
330  return _presence ? _presence->numOccurrences() : 0;
331 }
332 
333 
334 /*******************/
335 /*** num Absence ***/
336 int
338 {
339  return _absence ? _absence->numOccurrences() : 0;
340 }
341 
342 
343 /**********************/
344 /*** get One Sample ***/
347 {
348  Random rnd;
349 
350  if ( ! _presence ) {
351 
352  std::string msg = "No presence points available for sampling.\n";
353 
354  Log::instance()->error( msg.c_str() );
355 
356  throw SamplerException( msg );
357  }
358 
359  if ( ! _presence->numOccurrences() ) {
360 
361  std::string msg = "Cannot use zero presence points for sampling.\n";
362 
363  Log::instance()->error( msg.c_str() );
364 
365  throw SamplerException( msg );
366  }
367 
368  // Probability of 0.5 of get a presence point.
369  if ( rnd() < 0.5 ) {
370 
371  return getPresence();
372  }
373 
374  // Probability of 0.5 of get an absence point.
375  // (if there are real absence points...)
376  if ( _absence && _absence->numOccurrences() ) {
377 
378  return getAbsence();
379  }
380 
381  return getPseudoAbsence();
382 }
383 
384 /******************************/
385 /*** generate Random Sample ***/
388 {
389  if ( ! _env ) {
390 
391  std::string msg = "Cannot generate random samples without an Environment object.\n";
392 
393  Log::instance()->error( msg.c_str() );
394 
395  throw SamplerException( msg );
396  }
397 
398  // Generate a random sample
399  static const Sample mysample( numDependent() );
400  Coord x,y;
401 
402  Sample env( _env->getRandom( &x, &y ) );
403 
404  OccurrencePtr oc = new OccurrenceImpl( "?", x, y, 0.0, abundance, mysample, env );
405 
406  return oc;
407 }
408 
409 /**************************/
410 /*** get Pseudo Absence ***/
413 {
414  return generateRandomSample(0.0);
415 }
416 
417 /**************************/
418 /*** get Pseudo Presence ***/
421 {
422  return generateRandomSample(1.0);
423 }
424 
425 /**************************/
426 /*** get Pseudo Absence ***/
428 SamplerImpl::getPseudoAbsence( const Model& model, const Scalar threshold ) const
429 {
430  double prob = 0.0;
431 
432  OccurrencePtr occ;
433 
434  int max_loop = 5000;
435 
436  int loop = 0;
437 
438  do {
439 
440  occ = getPseudoAbsence();
441 
442  if ( model ) {
443 
444  prob = model->getValue( occ->environment() );
445  }
446 
447  loop++;
448 
449  } while ( ( prob >= threshold ) && ( loop < max_loop ) );
450 
451  if ( loop == max_loop ) {
452 
453  std::string msg = "Exceeded maximum number of attempts to generate point outside model.\n";
454 
455  Log::instance()->error( msg.c_str() );
456 
457  throw SamplerException( msg );
458  }
459 
460  return occ;
461 }
462 
463 /**************************/
464 /*** get Pseudo Presence ***/
466 SamplerImpl::getPseudoPresence( const Model& model, const Scalar threshold ) const
467 {
468  double prob = 0.0;
469 
470  OccurrencePtr occ;
471 
472  int max_loop = 5000;
473 
474  int loop = 0;
475 
476  do {
477 
478  occ = getPseudoPresence();
479 
480  if ( model ) {
481 
482  prob = model->getValue( occ->environment() );
483  }
484 
485  loop++;
486 
487  } while ( ( prob < threshold ) && ( loop < max_loop ) );
488 
489  if ( loop == max_loop ) {
490 
491  std::string msg = "Exceeded maximum number of attempts to generate point inside model.\n";
492 
493  Log::instance()->error( msg.c_str() );
494 
495  throw SamplerException( msg );
496  }
497 
498  return occ;
499 }
500 
501 /**************************/
502 /*** get Pseudo Absence ***/
504 SamplerImpl::getPseudoAbsenceOutsideInterval( const Sample * minimum, const Sample * maximum ) const
505 {
506  bool not_found = true;
507 
508  OccurrencePtr occ;
509  Sample x;
510 
511  int max_loop = 5000;
512 
513  int loop = 0;
514 
515  do {
516 
517  occ = getPseudoAbsence();
518  x = occ->environment();
519 
520  for ( unsigned int i = 0; i < x.size(); i++ ) {
521 
522  if ( x.size() == 0 )
523  break;
524 
525  if ( x[i] < (*minimum)[i] || x[i] > (*maximum)[i] ) {
526 
527  not_found = false;
528  break;
529  }
530  }
531 
532  loop++;
533 
534  } while ( ( not_found ) && ( loop < max_loop ) );
535 
536  if ( loop == max_loop ) {
537 
538  std::string msg = "Exceeded maximum number of attempts to generate point outside interval.\n";
539 
540  Log::instance()->error( msg.c_str() );
541 
542  throw SamplerException( msg );
543  }
544 
545  return occ;
546 }
547 
548 /**************************/
549 /*** get Pseudo Presence ***/
551 SamplerImpl::getPseudoPresenceInsideInterval( const Sample * minimum, const Sample * maximum ) const
552 {
553  bool not_found = true;
554 
555  OccurrencePtr occ;
556  Sample x;
557 
558  int max_loop = 5000;
559 
560  int loop = 0;
561 
562  do {
563 
564  occ = getPseudoPresence();
565  x = occ->environment();
566 
567  for ( unsigned int i = 0; i < x.size(); i++ ) {
568 
569  if ( x.size() == 0 )
570  break;
571 
572  if ( x[i] >= (*minimum)[i] && x[i] <= (*maximum)[i] ) {
573 
574  not_found = false;
575  break;
576  }
577  }
578 
579  loop++;
580 
581  } while ( ( not_found ) && ( loop < max_loop ) );
582 
583  if ( loop == max_loop ) {
584 
585  std::string msg = "Exceeded maximum number of attempts to generate point inside interval.\n";
586 
587  Log::instance()->error( msg.c_str() );
588 
589  throw SamplerException( msg );
590  }
591 
592  return occ;
593 }
594 
595 /***************************/
596 /*** get Pseudo Absences ***/
598 SamplerImpl::getPseudoAbsences( const int& numPoints, const Model& model, const Scalar threshold, const bool geoUnique, const bool envUnique, const int idSequenceStart) const
599 {
600  int i = 0;
601 
602  OccurrencesPtr occurrences( new OccurrencesImpl(0.0) );
603 
604  do
605  {
606  OccurrencePtr point;
607 
608  if ( model ) {
609 
610  point = getPseudoAbsence( model, threshold );
611  }
612  else {
613 
614  point = getPseudoAbsence();
615  }
616 
617  if ( geoUnique ) {
618 
619  if ( envUnique ) {
620 
621  if ( isEnvironmentallyUnique( occurrences, point ) &&
622  isEnvironmentallyUnique( _presence, point ) &&
623  isEnvironmentallyUnique( _absence, point ) ) {
624 
625  std::ostringstream oss;
626  oss << idSequenceStart+i;
627  point->setId( oss.str() );
628  occurrences->insert( point );
629  i++;
630  }
631  }
632  else {
633 
634  if ( isSpatiallyUnique( occurrences, point ) &&
635  isSpatiallyUnique( _presence, point ) &&
636  isSpatiallyUnique( _absence, point ) ) {
637 
638  std::ostringstream oss;
639  oss << idSequenceStart+i;
640  point->setId( oss.str() );
641  occurrences->insert( point );
642  i++;
643  }
644  }
645  }
646  else {
647 
648  if ( envUnique ) {
649 
650  if ( isEnvironmentallyUnique( occurrences, point ) &&
651  isEnvironmentallyUnique( _presence, point ) &&
652  isEnvironmentallyUnique( _absence, point ) ) {
653 
654  std::ostringstream oss;
655  oss << idSequenceStart+i;
656  point->setId( oss.str() );
657  occurrences->insert( point );
658  i++;
659  }
660  }
661  else {
662 
663  std::ostringstream oss;
664  oss << idSequenceStart+i;
665  point->setId( oss.str() );
666  occurrences->insert( point );
667  i++;
668  }
669  }
670 
671  } while ( i < numPoints );
672 
673  return occurrences;
674 }
675 
676 /***************************/
677 /*** get Pseudo Presences ***/
679 SamplerImpl::getPseudoPresences( const int& numPoints, const Model& model, const Scalar threshold, const bool geoUnique, const bool envUnique, const int idSequenceStart) const
680 {
681  int i = 0;
682 
683  OccurrencesPtr occurrences( new OccurrencesImpl(1.0) );
684 
685  do
686  {
687  OccurrencePtr point;
688 
689  if ( model ) {
690 
691  point = getPseudoPresence( model, threshold );
692  }
693  else {
694 
695  point = getPseudoPresence();
696  }
697 
698  if ( geoUnique ) {
699 
700  if ( envUnique ) {
701 
702  if ( isEnvironmentallyUnique( occurrences, point ) &&
703  isEnvironmentallyUnique( _presence, point ) &&
704  isEnvironmentallyUnique( _absence, point ) ) {
705 
706  std::ostringstream oss;
707  oss << idSequenceStart+i;
708  point->setId( oss.str() );
709  occurrences->insert( point );
710  i++;
711  }
712  }
713  else {
714 
715  if ( isSpatiallyUnique( occurrences, point ) &&
716  isSpatiallyUnique( _presence, point ) &&
717  isSpatiallyUnique( _absence, point ) ) {
718 
719  std::ostringstream oss;
720  oss << idSequenceStart+i;
721  point->setId( oss.str() );
722  occurrences->insert( point );
723  i++;
724  }
725  }
726  }
727  else {
728 
729  if ( envUnique ) {
730 
731  if ( isEnvironmentallyUnique( occurrences, point ) &&
732  isEnvironmentallyUnique( _presence, point ) &&
733  isEnvironmentallyUnique( _absence, point ) ) {
734 
735  std::ostringstream oss;
736  oss << idSequenceStart+i;
737  point->setId( oss.str() );
738  occurrences->insert( point );
739  i++;
740  }
741  }
742  else {
743 
744  std::ostringstream oss;
745  oss << idSequenceStart+i;
746  point->setId( oss.str() );
747  occurrences->insert( point );
748  i++;
749  }
750  }
751 
752  } while ( i < numPoints );
753 
754  return occurrences;
755 }
756 
757 /***************************/
758 /*** get Pseudo Absences ***/
760 SamplerImpl::getPseudoAbsences( const int& numPoints, const Sample * minimum, const Sample * maximum, const bool geoUnique, const bool envUnique, const int idSequenceStart) const
761 {
762  int i = 0;
763 
764  OccurrencesPtr occurrences( new OccurrencesImpl(0.0) );
765 
766  do
767  {
768  OccurrencePtr point;
769 
770  point = getPseudoAbsenceOutsideInterval( minimum, maximum );
771 
772  if ( geoUnique ) {
773 
774  if ( envUnique ) {
775 
776  if ( isEnvironmentallyUnique( occurrences, point ) &&
777  isEnvironmentallyUnique( _presence, point ) &&
778  isEnvironmentallyUnique( _absence, point ) ) {
779 
780  std::ostringstream oss;
781  oss << idSequenceStart+i;
782  point->setId( oss.str() );
783  occurrences->insert( point );
784  i++;
785  }
786  }
787  else {
788 
789  if ( isSpatiallyUnique( occurrences, point ) &&
790  isSpatiallyUnique( _presence, point ) &&
791  isSpatiallyUnique( _absence, point ) ) {
792 
793  std::ostringstream oss;
794  oss << idSequenceStart+i;
795  point->setId( oss.str() );
796  occurrences->insert( point );
797  i++;
798  }
799  }
800  }
801  else {
802 
803  if ( envUnique ) {
804 
805  if ( isEnvironmentallyUnique( occurrences, point ) &&
806  isEnvironmentallyUnique( _presence, point ) &&
807  isEnvironmentallyUnique( _absence, point ) ) {
808 
809  std::ostringstream oss;
810  oss << idSequenceStart+i;
811  point->setId( oss.str() );
812  occurrences->insert( point );
813  i++;
814  }
815  }
816  else {
817 
818  std::ostringstream oss;
819  oss << idSequenceStart+i;
820  point->setId( oss.str() );
821  occurrences->insert( point );
822  i++;
823  }
824  }
825 
826  } while ( i < numPoints );
827 
828  return occurrences;
829 }
830 
831 /***************************/
832 /*** get Pseudo Presences ***/
834 SamplerImpl::getPseudoPresences( const int& numPoints, const Sample * minimum, const Sample * maximum, const bool geoUnique, const bool envUnique, const int idSequenceStart) const
835 {
836  int i = 0;
837 
838  OccurrencesPtr occurrences( new OccurrencesImpl(1.0) );
839 
840  do
841  {
842  OccurrencePtr point;
843 
844  point = getPseudoPresenceInsideInterval( minimum, maximum );
845 
846  if ( geoUnique ) {
847 
848  if ( envUnique ) {
849 
850  if ( isEnvironmentallyUnique( occurrences, point ) &&
851  isEnvironmentallyUnique( _presence, point ) &&
852  isEnvironmentallyUnique( _absence, point ) ) {
853 
854  std::ostringstream oss;
855  oss << idSequenceStart+i;
856  point->setId( oss.str() );
857  occurrences->insert( point );
858  i++;
859  }
860  }
861  else {
862 
863  if ( isSpatiallyUnique( occurrences, point ) &&
864  isSpatiallyUnique( _presence, point ) &&
865  isSpatiallyUnique( _absence, point ) ) {
866 
867  std::ostringstream oss;
868  oss << idSequenceStart+i;
869  point->setId( oss.str() );
870  occurrences->insert( point );
871  i++;
872  }
873  }
874  }
875  else {
876 
877  if ( envUnique ) {
878 
879  if ( isEnvironmentallyUnique( occurrences, point ) &&
880  isEnvironmentallyUnique( _presence, point ) &&
881  isEnvironmentallyUnique( _absence, point ) ) {
882 
883  std::ostringstream oss;
884  oss << idSequenceStart+i;
885  point->setId( oss.str() );
886  occurrences->insert( point );
887  i++;
888  }
889  }
890  else {
891 
892  std::ostringstream oss;
893  oss << idSequenceStart+i;
894  point->setId( oss.str() );
895  occurrences->insert( point );
896  i++;
897  }
898  }
899 
900  } while ( i < numPoints );
901 
902  return occurrences;
903 }
904 
905 /**********************/
906 /*** is Categorical ***/
907 int
909 {
910  if ( _env ) {
911 
912  return _env->isCategorical( i );
913  }
914  else {
915  // if there is no environment obj, assumes all variables are continuous
916  // right now there is no mechanism to define whether a variable is
917  // continuous or categorical when occurrences already come with their
918  // samples populated
919  return false;
920  }
921 }
922 
923 
924 /******************************/
925 /*** environmentally unique ***/
926 void
928 {
929  Log::instance()->info( "Applying filter: enviromentally unique\n" );
930 
931  // Presences
932  this->environmentallyUnique( _presence, "Presence" );
933 
934  // Absences
935  this->environmentallyUnique( _absence, "Absence" );
936 }
937 
938 
939 /******************************/
940 /*** environmentally unique ***/
941 void
942 SamplerImpl::environmentallyUnique( OccurrencesPtr& occurrencesPtr, const char *type )
943 {
944  if ( ! ( occurrencesPtr && occurrencesPtr->numOccurrences() ) ) {
945 
946  return;
947  }
948 
949  OccurrencesImpl::iterator it = occurrencesPtr->begin();
950  OccurrencesImpl::iterator last = occurrencesPtr->end();
951 
952  while ( it != last ) {
953 
954  Sample sample = _env->getUnnormalized( (*it)->x(), (*it)->y() );
955 
956  OccurrencesImpl::iterator next = it + 1;
957 
958  while ( next != last ) {
959 
960  Sample nextSample = _env->getUnnormalized( (*next)->x(), (*next)->y() );
961 
962  if ( sample.equals( nextSample ) ) {
963 
964  Log::instance()->info( "%s Point \"%s\" at (%f,%f) has no unique environment. It will be discarded.\n", type, ((*next)->id()).c_str(), (*next)->x(), (*next)->y() );
965 
966  // Remove duplicates
967  next = occurrencesPtr->erase( next );
968  last = occurrencesPtr->end();
969 
970  // Increase abundance in original occurence
971  (*it)->setAbundance( (*it)->abundance() + 1 );
972 
973  // No need to increment "next" because "erase" actually swaps
974  // the last element with the one that was just erased!
975  }
976  else {
977 
978  ++next;
979  }
980  }
981 
982  ++it;
983  }
984 }
985 
986 
987 /************************/
988 /*** spatially unique ***/
989 void
991 {
992  Log::instance()->info( "Applying filter: spatially unique\n" );
993 
994  // Presences
995  this->spatiallyUnique( _presence, "Presence" );
996 
997  // Absences
998  this->spatiallyUnique( _absence, "Absence" );
999 }
1000 
1001 
1002 /************************/
1003 /*** spatially unique ***/
1004 void
1005 SamplerImpl::spatiallyUnique( OccurrencesPtr& occurrencesPtr, const char *type )
1006 {
1007  if ( ! ( occurrencesPtr && occurrencesPtr->numOccurrences() ) ) {
1008 
1009  return;
1010  }
1011 
1012  Map *mask = _env->getMask();
1013 
1014  // If mask is undefined, use first layer as a mask
1015  if ( ! mask ) {
1016 
1017  mask = _env->getLayer( 0 );
1018  }
1019 
1020  OccurrencesImpl::iterator it = occurrencesPtr->begin();
1021  OccurrencesImpl::iterator last = occurrencesPtr->end();
1022 
1023  while ( it != last ) {
1024 
1025  int row, col;
1026 
1027  mask->getRowColumn( (*it)->x(), (*it)->y(), &row, &col );
1028 
1029  OccurrencesImpl::iterator next = it + 1;
1030 
1031  while ( next != last ) {
1032 
1033  int nextRow, nextCol;
1034 
1035  mask->getRowColumn( (*next)->x(), (*next)->y(), &nextRow, &nextCol );
1036 
1037  if ( row == nextRow && col == nextCol ) {
1038 
1039  Log::instance()->info( "%s Point \"%s\" at (%f,%f) has no unique geography. It will be discarded.\n", type, ((*next)->id()).c_str(), (*next)->x(), (*next)->y() );
1040 
1041  // Remove duplicates
1042  next = occurrencesPtr->erase( next );
1043  last = occurrencesPtr->end();
1044 
1045  // Increase abundance in original occurence
1046  (*it)->setAbundance( (*it)->abundance() + 1 );
1047 
1048  // No need to increment "next" because "erase" actually swaps
1049  // the last element with the one that was just erased!
1050  }
1051  else {
1052 
1053  ++next;
1054  }
1055  }
1056 
1057  ++it;
1058  }
1059 }
1060 
1061 
1062 /*********************************/
1063 /*** is Environmentally Unique ***/
1064 bool
1066 {
1067  if ( ! ( occurrences && occurrences->numOccurrences() ) ) {
1068 
1069  return true;
1070  }
1071 
1072  OccurrencesImpl::iterator it = occurrences->begin();
1073  OccurrencesImpl::iterator last = occurrences->end();
1074 
1075  Sample sample = _env->getUnnormalized( point->x(), point->y() );
1076 
1077  while ( it != last ) {
1078 
1079  Sample nextSample = _env->getUnnormalized( (*it)->x(), (*it)->y() );
1080 
1081  if ( sample.equals( nextSample ) ) {
1082 
1083  return false;
1084  }
1085  else {
1086 
1087  ++it;
1088  }
1089  }
1090 
1091  return true;
1092 }
1093 
1094 /***************************/
1095 /*** is Spatially Unique ***/
1096 bool
1097 SamplerImpl::isSpatiallyUnique( const OccurrencesPtr& occurrences, const OccurrencePtr& point ) const
1098 {
1099  if ( ! ( occurrences && occurrences->numOccurrences() ) ) {
1100 
1101  return true;
1102  }
1103 
1104  Map *mask = _env->getMask();
1105 
1106  // If mask is undefined, use first layer as a mask
1107  if ( ! mask ) {
1108 
1109  mask = _env->getLayer( 0 );
1110  }
1111 
1112  OccurrencesImpl::iterator it = occurrences->begin();
1113  OccurrencesImpl::iterator last = occurrences->end();
1114 
1115  int row, col;
1116  mask->getRowColumn( point->x(), point->y(), &row, &col );
1117 
1118  while ( it != last ) {
1119 
1120  int next_row, next_col;
1121 
1122  mask->getRowColumn( (*it)->x(), (*it)->y(), &next_row, &next_col );
1123 
1124  if ( row == next_row && col == next_col ) {
1125 
1126  return false;
1127  }
1128  else {
1129 
1130  ++it;
1131  }
1132  }
1133 
1134  return true;
1135 }
1136 
1137 
1138 /*****************************/
1139 /*** get Random Occurrence ***/
1142 {
1143  // This has been rewritten to eliminate the
1144  // possibly endless loop.
1145  // It assumes that by the time this routine is called,
1146  // all occurrences have valid environment data.
1147 
1148  return occur->getRandom();
1149 
1150 }
1151 
1152 
1153 /************/
1154 /*** dump ***/
1155 void
1157 {
1158  if ( _presence ) {
1159 
1160  _presence->dump( "Presences" );
1161  }
1162  if ( _absence ) {
1163 
1164  _absence->dump( "Absences" );
1165  }
1166 }
1167 
1168 
1169 /***********************/
1170 /**** split Sampler ****/
1171 void splitSampler(const SamplerPtr& orig,
1172  SamplerPtr * train,
1173  SamplerPtr * test,
1174  double propTrain)
1175 {
1176  // split presences
1177  OccurrencesPtr presence = orig->getPresences();
1178 
1179  OccurrencesPtr test_presence;
1180 
1181  OccurrencesPtr train_presence;
1182 
1183  if ( presence ) {
1184 
1185  test_presence = new OccurrencesImpl( presence->label(), presence->coordSystem() );
1186 
1187  train_presence = new OccurrencesImpl( presence->label(), presence->coordSystem() );
1188 
1189  splitOccurrences( presence, train_presence, test_presence, propTrain );
1190  }
1191 
1192  // split absences
1193  OccurrencesPtr train_absence;
1194  OccurrencesPtr test_absence;
1195 
1196  OccurrencesPtr absence = orig->getAbsences();
1197 
1198  if ( absence ) {
1199 
1200  test_absence = new OccurrencesImpl( absence->label(), absence->coordSystem() );
1201 
1202  train_absence = new OccurrencesImpl( absence->label(), absence->coordSystem() );
1203 
1204  splitOccurrences( absence, train_absence, test_absence, propTrain );
1205  }
1206 
1207  *train = new SamplerImpl( orig->getEnvironment(),
1208  train_presence, train_absence,
1209  orig->isNormalized() );
1210 
1211  *test = new SamplerImpl( orig->getEnvironment(),
1212  test_presence, test_absence,
1213  orig->isNormalized() );
1214 }
1215 
1216 /*************/
1217 /*** clone ***/
1218 SamplerPtr
1220 {
1221  OccurrencesPtr presences;
1222  OccurrencesPtr absences;
1223 
1224  if ( orig->numPresence() ) {
1225 
1226  presences = orig->getPresences()->clone();
1227  }
1228 
1229  if ( orig->numAbsence() ) {
1230 
1231  absences = orig->getAbsences()->clone();
1232  }
1233 
1234  EnvironmentPtr environment = orig->getEnvironment()->clone();
1235 
1236  SamplerPtr fresh_sampler = createSampler( environment, presences, absences );
1237 
1238  return fresh_sampler;
1239 }
1240 
OccurrencePtr getPseudoAbsence() const
Definition: Sampler.cpp:412
OccurrencePtr generateRandomSample(Scalar abundance=1.0) const
Definition: Sampler.cpp:387
void spatiallyUnique()
Definition: Sampler.cpp:990
void resetNormalization()
Definition: Sampler.cpp:273
std::vector< OccurrencePtr >::iterator iterator
Definition: Occurrences.hh:86
ConstOccurrencePtr getAbsence() const
Definition: Sampler.hh:162
void getMinMax(Sample *min, Sample *max) const
Definition: Sampler.cpp:209
double Scalar
Type of map values.
Definition: om_defs.hh:39
OccurrencesPtr getPseudoAbsences(const int &numPoints, const Model &model, const Scalar threshold=0.5, const bool geoUnique=false, const bool envUnique=false, const int idSequenceStart=1) const
Definition: Sampler.cpp:598
SamplerPtr cloneSampler(const SamplerPtr &orig)
Definition: Sampler.cpp:1219
int numAbsence() const
Definition: Sampler.cpp:337
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
bool equals(const Sample &) const
Definition: Sample.cpp:249
OccurrencesPtr _presence
Definition: Sampler.hh:276
int isCategorical(int i)
Definition: Sampler.cpp:908
EnvironmentPtr createEnvironment(const std::vector< std::string > &categs, const std::vector< std::string > &maps, const std::string &mask_file)
Definition: Environment.cpp:55
SamplerPtr createSampler(const EnvironmentPtr &env, const OccurrencesPtr &presence, const OccurrencesPtr &absence)
Definition: Sampler.cpp:52
ConstOccurrencePtr getPresence() const
Definition: Sampler.hh:150
EnvironmentPtr _env
Definition: Sampler.hh:278
void error(const char *format,...)
'Error' level.
Definition: Log.cpp:290
ConstOccurrencePtr getOneSample() const
Definition: Sampler.cpp:346
bool isSpatiallyUnique(const OccurrencesPtr &occurrences, const OccurrencePtr &point) const
Definition: Sampler.cpp:1097
Definition: Random.hh:44
ConstOccurrencePtr getRandomOccurrence(const OccurrencesPtr &occur) const
Definition: Sampler.cpp:1141
OccurrencePtr getPseudoAbsenceOutsideInterval(const Sample *minimum, const Sample *maximum) const
Definition: Sampler.cpp:504
void splitSampler(const SamplerPtr &orig, SamplerPtr *train, SamplerPtr *test, double propTrain)
Definition: Sampler.cpp:1171
void dump() const
Definition: Sampler.cpp:1156
ConfigurationPtr getConfiguration() const
Definition: Sampler.cpp:127
ReferenceCountedPointer< SamplerImpl > SamplerPtr
Definition: Sampler.hh:47
void setConfiguration(const ConstConfigurationPtr &)
Definition: Sampler.cpp:156
void splitOccurrences(const OccurrencesPtr &occurrences, OccurrencesPtr &trainOccurrences, OccurrencesPtr &testOccurrences, double propTrain)
Definition: Map.hh:49
void setEnvironmentInOccurrences()
Definition: Sampler.cpp:108
std::size_t size() const
Definition: Sample.hh:70
OccurrencesPtr _absence
Definition: Sampler.hh:277
OccurrencesPtr getPseudoPresences(const int &numPoints, const Model &model, const Scalar threshold=0.5, const bool geoUnique=false, const bool envUnique=false, const int idSequenceStart=1) const
Definition: Sampler.cpp:679
void info(const char *format,...)
'Info' level.
Definition: Log.cpp:256
OccurrencePtr getPseudoPresenceInsideInterval(const Sample *minimum, const Sample *maximum) const
Definition: Sampler.cpp:551
void normalize(Normalizer *normalizerPtr)
Definition: Sampler.cpp:233
double Coord
Type of map coordinates.
Definition: om_defs.hh:38
bool isEnvironmentallyUnique(const OccurrencesPtr &occurrences, const OccurrencePtr &point) const
Definition: Sampler.cpp:1065
int numIndependent() const
Definition: Sampler.cpp:296
void environmentallyUnique()
Definition: Sampler.cpp:927
void debug(const char *format,...)
'Debug' level.
Definition: Log.cpp:237
bool _normalized
Definition: Sampler.hh:279
int min(int v1, int v2)
Definition: rules_base.cpp:56
int numDependent() const
Definition: Sampler.cpp:319
int getRowColumn(Coord x, Coord y, int *row, int *col)
Definition: Map.cpp:128
Definition: Sample.hh:25
OccurrencePtr getPseudoPresence() const
Definition: Sampler.cpp:420
int numPresence() const
Definition: Sampler.cpp:328