openModeller  Version 1.5.0
Occurrences.cpp
Go to the documentation of this file.
1 
29 
31 #include <openmodeller/Random.hh>
32 #include <openmodeller/Log.hh>
37 
38 #include <string>
39 using std::string;
40 
41 // String stream is included for the dump method
42 #include <sstream>
43 using std::ostringstream;
44 
45 #include <algorithm> // needed for random_shuffle
46 
47 #include <math.h>
48 
49 /****************************************************************/
50 /************************ Occurrences ***************************/
51 
52 /*******************/
53 /*** Constructor ***/
54 
56 {
57  delete gt_;
58 }
59 
60 void
61 OccurrencesImpl::setLabel( const string& label )
62 {
63  label_ = label;
64 }
65 
66 void
68 {
69  cs_ = cs;
71 }
72 
73 void
75 {
76  if ( gt_ ) {
77 
78  delete gt_;
79  }
80 
82 }
83 
84 /*********************/
85 /*** configuration ***/
86 
89 {
90  ConfigurationPtr config( new ConfigurationImpl("Occurrences") );
91 
92  config->addNameValue( "Label", label() );
93 
94  ConfigurationPtr cs( new ConfigurationImpl( "CoordinateSystem" ) );
95  cs->setValue( coordSystem() );
96 
97  config->addSubsection( cs );
98 
99  config->addNameValue( "Count", int(occur_.size()) );
100 
101  const_iterator oc = occur_.begin();
102  const_iterator end = occur_.end();
103 
104  while ( oc != end ) {
105 
106  ConfigurationPtr cfg( new ConfigurationImpl("Point") );
107  std::string id = (*oc)->id();
108  Scalar x = (*oc)->x();
109  Scalar y = (*oc)->y();
110  gt_->transfIn( &x, &y );
111  int precision = 9;
112  cfg->addNameValue( "Id", id );
113  cfg->addNameValue( "X", x, precision );
114  cfg->addNameValue( "Y", y, precision );
115  if ( (*oc)->hasEnvironment() ) {
116 
117  cfg->addNameValue( "Sample", (*oc)->originalEnvironment() );
118  }
119  config->addSubsection( cfg );
120 
121  oc++;
122  }
123 
124  return config;
125 }
126 
127 void
129 {
130  label_ = config->getAttribute("Label");
131 
132  ConstConfigurationPtr cs_config = config->getSubsection( "CoordinateSystem", false );
133 
134  if ( ! cs_config ) {
135 
136  Log::instance()->warn( "Occurrences has no Coordinate System. Assuming LatLong WSG84\n" );
138  }
139  else {
140 
141  cs_ = cs_config->getValue();
142  }
143 
144  initGeoTransform( );
145 
146  Configuration::subsection_list subs = config->getAllSubsections();
147 
148  Configuration::subsection_list::iterator begin = subs.begin();
149  Configuration::subsection_list::iterator end = subs.end();
150 
151  std::vector<Scalar> attrs;
152 
153  for ( ; begin != end; ++begin ) {
154 
155  if ( (*begin)->getName() != "Point" ) {
156 
157  continue;
158  }
159 
160  std::string id = (*begin)->getAttribute("Id");
161  Scalar x = (*begin)->getAttributeAsDouble( "X", 0.0 );
162  Scalar y = (*begin)->getAttributeAsDouble( "Y", 0.0 );
163  Scalar abundance = (*begin)->getAttributeAsDouble( "Abundance", default_abundance_ );
164 
165  try {
166 
167  // If present, load environmental values from XML
168  std::vector<Scalar> unnormenv = (*begin)->getAttributeAsVecDouble( "Sample" );
169  createOccurrence( id, x, y, 0, abundance, attrs, unnormenv );
170  }
171  catch ( AttributeNotFound& e ) {
172 
173  // Sample attribute is optional
174  createOccurrence( id, x, y, 0, abundance, 0, 0, 0, 0 );
175  UNUSED(e);
176  }
177  }
178 }
179 
180 void
181 OccurrencesImpl::setEnvironment( const EnvironmentPtr& env, const char *type )
182 {
183  if ( isEmpty() ) {
184 
185  return;
186  }
187 
188  OccurrencesImpl::iterator oc = occur_.begin();
189  OccurrencesImpl::iterator fin = occur_.end();
190 
191  while ( oc != fin ) {
192 
193  Sample sample = env->getUnnormalized( (*oc)->x(), (*oc)->y() );
194 
195  if ( sample.size() == 0 ) {
196 
197  Log::instance()->warn( "%s Point \"%s\" at (%f,%f) has no environment. It will be discarded.\n", type, ((*oc)->id()).c_str(), (*oc)->x(), (*oc)->y() );
198 
199  oc = occur_.erase( oc );
200  fin = occur_.end();
201  }
202  else {
203 
204  (*oc)->setUnnormalizedEnvironment( sample );
205  (*oc)->setNormalizedEnvironment( Sample() );
206 
207  ++oc;
208  }
209  }
210 }
211 
212 /*****************/
213 /*** normalize ***/
214 void
215 OccurrencesImpl::normalize( Normalizer * normalizerPtr, size_t categoricalThreshold )
216 {
217  if ( ! normalizerPtr ) {
218 
219  return;
220  }
221 
224 
225  // set the normalized values
226  while ( occ != end ) {
227 
228  (*occ)->normalize( normalizerPtr, categoricalThreshold );
229  ++occ;
230  }
231 }
232 
233 /***************************/
234 /*** reset Normalization ***/
235 void
237 {
240 
241  while ( occ != end ) {
242 
243  (*occ)->setNormalizedEnvironment( (*occ)->originalEnvironment() );
244  ++occ;
245  }
246 }
247 
248 /******************/
249 /*** get MinMax ***/
250 void
252 {
255 
256  *min = Sample( (*occ)->environment() );
257  *max = Sample( (*occ)->environment() );
258 
259  // grab max and min values per variable
260  while ( occ != end ) {
261 
262  Sample sample = (*occ)->environment();
263  *min &= sample;
264  *max |= sample;
265  ++occ;
266  }
267 }
268 
269 
270 /**************/
271 /*** insert ***/
272 void
273 OccurrencesImpl::createOccurrence( const std::string& id,
274  Coord longitude, Coord latitude,
275  Scalar error, Scalar abundance,
276  int num_attributes, Scalar *attributes,
277  int num_env, Scalar *env )
278 {
279  // Transforms the given coordinates in the common openModeller
280  // coordinate system.
281  gt_->transfOut( &longitude, &latitude );
282 
283  insert( new OccurrenceImpl( id, longitude, latitude, error, abundance,
284  num_attributes, attributes,
285  num_env, env ) );
286 
287 }
288 
289 void
290 OccurrencesImpl::createOccurrence( const std::string& id,
291  Coord longitude, Coord latitude,
292  Scalar error, Scalar abundance,
293  std::vector<double> attributes,
294  std::vector<double> env)
295 {
296  // Transforms the given coordinates in the common openModeller
297  // coordinate system.
298  gt_->transfOut( &longitude, &latitude );
299 
300  insert( new OccurrenceImpl( id, longitude, latitude, error, abundance,
301  attributes, env ) );
302 
303 }
304 
305 void
307 {
308  occur_.push_back( oc );
309 }
310 
313 {
314 
315  const_iterator it = occur_.begin();
316  const_iterator end = occur_.end();
317 
319 
320  while( it != end ) {
321 
322  clone->insert( new OccurrenceImpl( *(*it) ) );
323 
324  it++;
325  }
326 
327  return clone;
328 }
329 
330 bool
332 {
333  if ( ! numOccurrences() ) {
334 
335  return false;
336  }
337 
338  const_iterator it = occur_.begin();
339 
340  return (*it)->hasEnvironment();
341 }
342 
343 int
345 {
346  if ( hasEnvironment() ) {
347 
348  const_iterator it = occur_.begin();
349 
350  return (*it)->environment().size();
351  }
352  else {
353 
354  return 0;
355  }
356 }
357 
358 /******************/
359 /*** get Random ***/
362 {
363  Random rnd;
364  int selected = (int) rnd( numOccurrences() );
365 
366  return occur_[ selected ];
367 }
368 
371 {
372  swap( occur_.back(), (*it) );
373  occur_.pop_back();
374  return it;
375 }
376 
377 
378 void
380 {
381  if ( ! source ) {
382 
383  return;
384  }
385 
386  const_iterator it = source->begin();
387  const_iterator end = source->end();
388 
389  while ( it != end ) {
390 
391  insert(*it);
392  ++it;
393  }
394 }
395 
396 
397 /******************************/
398 /*** get Environment Matrix ***/
399 std::vector<ScalarVector>
401 {
402  std::vector<ScalarVector> matrix( dimension() );
403 
404  // Initialize matrix
405  for ( unsigned int i = 0; i < matrix.size(); i++ ) {
406 
407  matrix[i] = ScalarVector( numOccurrences() );
408  }
409 
410  const_iterator c = occur_.begin();
411  const_iterator end = occur_.end();
412 
413  int j = 0;
414 
415  // For each Occurrence
416  while ( c != end ) {
417 
418  Sample const& sample = (*c)->environment();
419 
420  // For each layer
421  for ( unsigned int i = 0; i < matrix.size(); i++ ) {
422 
423  // Feed new matrix
424  matrix[i][j] = sample[i];
425  }
426 
427  ++c;
428  ++j;
429  }
430 
431  return matrix;
432 }
433 
434 
435 /*************/
436 /*** print ***/
437 void
438 OccurrencesImpl::dump( std::string msg ) const
439 {
440  Log::instance()->info( "%s\n", msg.c_str() );
441 
442  // Occurrences general data.
443  Log::instance()->info( "Label: %s\n", label_.c_str() );
444  Log::instance()->info( "\nOccurrences: %d\n\n", numOccurrences() );
445 
446  const_iterator c = occur_.begin();
447  const_iterator end = occur_.end();
448 
449  while ( c != end ) {
450 
451  // Get attributes
452 
453  ostringstream ss;
454 
455  Sample::const_iterator attr = (*c)->attributes().begin();
456  Sample::const_iterator end = (*c)->attributes().end();
457  ss << "( ";
458 
459  while ( attr != end ) {
460 
461  ss << *attr << " ";
462  attr++;
463  }
464 
465  ss << ")\n";
466 
467  Log::instance()->info( "(%+8.4f, %+8.4f)\n", (*c)->x(), (*c)->y() );
468 
469  (*c)->dump();
470 
471  c++;
472  }
473 }
474 
475 
476 /***************************/
477 /**** split Occurrences ****/
478 void splitOccurrences(const OccurrencesPtr& occurrences,
479  OccurrencesPtr& trainOccurrences,
480  OccurrencesPtr& testOccurrences,
481  double propTrain)
482 {
483  // add all samples to an array
484  int i;
485  int n = occurrences->numOccurrences();
486  int k = (int) (n * propTrain);
487  std::vector<int> goToTrainSet(n);
488 
489  // first k are set to go to train set
490  for ( i = 0; i < k; i++ ) {
491 
492  goToTrainSet[i] = 1;
493  }
494 
495  // all others are set to go to test set
496  for ( ; i < n; i++ ) {
497 
498  goToTrainSet[i] = 0;
499  }
500 
501  // shuffle elements well
502  initRandom();
503 
504  std::random_shuffle( goToTrainSet.begin(), goToTrainSet.end() );
505 
506  // traverse occurrences copying them to the right sampler
507  OccurrencesImpl::const_iterator it = occurrences->begin();
508  OccurrencesImpl::const_iterator fin = occurrences->end();
509 
510  i = 0;
511 
512  while( it != fin ) {
513 
514  if ( goToTrainSet[i] ) {
515 
516  trainOccurrences->insert( new OccurrenceImpl( *(*it) ) );
517  }
518  else {
519 
520  testOccurrences->insert( new OccurrenceImpl( *(*it) ) );
521  }
522 
523  ++i; ++it;
524  }
525 }
526 
527 /***************************/
528 /**** split Occurrences in train/test using distance between points( Missae 09/2009 ) ****/
529 void splitOccurrences(const OccurrencesPtr& occurrences,
530  OccurrencesPtr& trainOccurrences,
531  OccurrencesPtr& testOccurrences)
532 {
533  double dist, distLimit=8.0, x, y, xmin, xmax, ymin, ymax, deltax, deltay;
534  unsigned int flag = 0, i = 0, itrain=0, ktrain=0, ioccur=0, flagOk=0;
535  std::vector<double> occurTransformx( occurrences->numOccurrences() );
536  std::vector<double> occurTransformy( occurrences->numOccurrences() );
537  std::vector<int> testId( occurrences->numOccurrences() );
538  int n = occurrences->numOccurrences(), icont=0;
539  int nptTeste = (int) (n * 0.40) + 2;
540 
541  OccurrencesImpl::const_iterator it = occurrences->begin();
542  OccurrencesImpl::const_iterator fin = occurrences->end();
543 
544  xmin = xmax = (*it)->x();
545  ymin = ymax = (*it)->y();
546 
547  ++it;
548  while( it != fin ) {
549  if ( (*it)->x() < xmin ) xmin = (*it)->x();
550  else if ( (*it)->x() > xmax) xmax = (*it)->x();
551  if ( (*it)->y() < ymin) ymin = (*it)->y();
552  else if ( (*it)->y() > ymax) ymax = (*it)->y();
553  ++it;
554  }
555  deltax = xmax - xmin;
556  deltay = ymax - ymin;
557 
558  it = occurrences->begin();
559  while( it != fin ) {
560  occurTransformx[i] = 100 * ( (*it)->x() - xmin ) / deltax;
561  occurTransformy[i] = 100 * ( (*it)->y() - ymin ) / deltay;
562  i++;
563  ++it;
564  }
565  do{
566  flagOk=0, flag = 0, itrain=0, ktrain=0, ioccur=0, icont=0;
567 
568  it = occurrences->begin();
569 
570  trainOccurrences->insert( new OccurrenceImpl( *(*it) ) );
571  testOccurrences->insert( new OccurrenceImpl( *(*it) ) );
572 
573  ++it;
574  testId[ktrain] = ioccur;
575  ktrain++;
576  ioccur++;
577 
578  while( it != fin ) {
579 
580  for ( i = 0; i < ktrain; i++ ) {
581  itrain = testId[i];
582  x = occurTransformx[ioccur] - occurTransformx[itrain];
583  y = occurTransformy[ioccur] - occurTransformy[itrain];
584  dist = sqrt( (x*x) + (y*y) );
585 
586  if ( dist < distLimit) {
587  testOccurrences->insert( new OccurrenceImpl( *(*it) ) );
588  flag = 1;
589  icont++;
590  break;
591  }
592  }
593 
594  if (icont > nptTeste){
595  OccurrencesImpl::iterator it = testOccurrences->begin();
596  OccurrencesImpl::iterator last = testOccurrences->end();
597  --last;
598  while ( it != last ) {
599  it = testOccurrences->erase(it);
600  last = testOccurrences->end();
601  --last;
602  }
603  it = testOccurrences->erase(it);
604 
605  OccurrencesImpl::iterator itt = trainOccurrences->begin();
606  OccurrencesImpl::iterator lastt = trainOccurrences->end();
607  --lastt;
608  while ( itt != lastt ) {
609  itt = trainOccurrences->erase(itt);
610  lastt = trainOccurrences->end();
611  --lastt;
612  }
613  itt = trainOccurrences->erase(itt);
614 
615  if (distLimit > 1.0) distLimit = distLimit - 1.0;
616  else if (distLimit > 0.2) distLimit = distLimit - 0.2;
617  else distLimit = distLimit - 0.02;
618  flagOk=1;
619  break;
620  }
621  if (!flag){
622  trainOccurrences->insert( new OccurrenceImpl( *(*it) ) );
623  testId[ktrain] = ioccur;
624  ktrain++;
625  }else{
626  flag = 0;
627  }
628  ioccur++;
629  ++it;
630  }
631  }while(flagOk == 1);
632 }
std::vector< OccurrencePtr >::iterator iterator
Definition: Occurrences.hh:86
void warn(const char *format,...)
'Warn' level.
Definition: Log.cpp:273
void swap(ReferenceCountedPointer< T > &lhs, ReferenceCountedPointer< T > &rhs)
Definition: refcount.hh:146
bool isEmpty() const
Definition: Occurrences.hh:198
void initGeoTransform()
Definition: Occurrences.cpp:74
std::vector< ConfigurationPtr > subsection_list
void createOccurrence(const std::string &id, Coord longitude, Coord latitude, Scalar error, Scalar abundance, int num_attributes=0, Scalar *attributes=0, int num_env=0, Scalar *env=0)
double Scalar
Type of map values.
Definition: om_defs.hh:39
int initRandom(unsigned int new_seed)
const_iterator begin() const
Definition: Occurrences.hh:204
void setCoordinateSystem(const std::string &cs)
Definition: Occurrences.cpp:67
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
OccurrencesImpl * clone() const
OccurrencesImpl(double default_abundance)
Definition: Occurrences.hh:91
ConstOccurrencePtr getRandom() const
int transfIn(double *x, double *y) const
std::vector< OccurrencePtr > occur_
Definition: Occurrences.hh:253
std::string label_
Definition: Occurrences.hh:248
virtual void setConfiguration(const ConstConfigurationPtr &)
void normalize(Normalizer *normalizerPtr, size_t categoricalThreshold=0)
const_iterator end() const
Definition: Occurrences.hh:205
double default_abundance_
Definition: Occurrences.hh:246
void getMinMax(Sample *min, Sample *max) const
Definition: Random.hh:44
void setLabel(const std::string &label)
Definition: Occurrences.cpp:61
std::vector< ScalarVector > getEnvironmentMatrix()
#define UNUSED(symbol)
Definition: os_specific.hh:55
GeoTransform * gt_
Definition: Occurrences.hh:251
int transfOut(double *x, double *y) const
bool hasEnvironment() const
static char const * getDefaultCS()
void splitOccurrences(const OccurrencesPtr &occurrences, OccurrencesPtr &trainOccurrences, OccurrencesPtr &testOccurrences, double propTrain)
std::vector< Scalar > ScalarVector
Definition: niche_mosaic.hh:33
void insert(const OccurrencePtr &)
void resetNormalization()
Scalar const * const_iterator
Definition: Sample.hh:90
std::size_t size() const
Definition: Sample.hh:70
virtual ConfigurationPtr getConfiguration() const
Definition: Occurrences.cpp:88
void setEnvironment(const EnvironmentPtr &env, const char *type="Sample")
iterator erase(const iterator &it)
void info(const char *format,...)
'Info' level.
Definition: Log.cpp:256
void dump(std::string msg="") const
double Coord
Type of map coordinates.
Definition: om_defs.hh:38
int dimension() const
std::vector< OccurrencePtr >::const_iterator const_iterator
Definition: Occurrences.hh:85
char const * coordSystem() const
Definition: Occurrences.hh:129
char const * label() const
Definition: Occurrences.hh:126
std::string cs_
Definition: Occurrences.hh:249
int min(int v1, int v2)
Definition: rules_base.cpp:56
Definition: Sample.hh:25
int numOccurrences() const
Definition: Occurrences.hh:195
static char error[256]
Definition: FileParser.cpp:42
void appendFrom(const OccurrencesPtr &source)