openModeller  Version 1.4.0
Occurrences.cpp
Go to the documentation of this file.
00001 
00028 #include <openmodeller/Occurrences.hh>
00029 
00030 #include <openmodeller/Occurrence.hh>
00031 #include <openmodeller/Random.hh>
00032 #include <openmodeller/Log.hh>
00033 #include <openmodeller/env_io/GeoTransform.hh>
00034 #include <openmodeller/Configuration.hh>
00035 #include <openmodeller/Exceptions.hh>
00036 #include <openmodeller/os_specific.hh>
00037 
00038 #include <string>
00039 using std::string;
00040 
00041 // String stream is included for the dump method
00042 #include <sstream>
00043 using std::ostringstream;
00044 
00045 #include <algorithm> // needed for random_shuffle
00046 
00047 #include <math.h>
00048 
00049 /****************************************************************/
00050 /************************ Occurrences ***************************/
00051 
00052 /*******************/
00053 /*** Constructor ***/
00054 
00055 OccurrencesImpl::~OccurrencesImpl()
00056 {
00057   delete gt_;
00058 }
00059 
00060 void
00061 OccurrencesImpl::setLabel( const string& label )
00062 {
00063   label_ = label;
00064 }
00065 
00066 void
00067 OccurrencesImpl::setCoordinateSystem( const string& cs )
00068 {
00069   cs_ = cs;
00070   initGeoTransform();
00071 }
00072 
00073 void
00074 OccurrencesImpl::initGeoTransform()
00075 {
00076   if ( gt_ ) {
00077 
00078     delete gt_;
00079   }
00080 
00081   gt_ = new GeoTransform( cs_, GeoTransform::getDefaultCS() );
00082 }
00083 
00084 /*********************/
00085 /*** configuration ***/
00086 
00087 ConfigurationPtr
00088 OccurrencesImpl::getConfiguration() const
00089 {
00090   ConfigurationPtr config( new ConfigurationImpl("Occurrences") );
00091 
00092   config->addNameValue( "Label", label() );
00093 
00094   ConfigurationPtr cs( new ConfigurationImpl( "CoordinateSystem" ) );
00095   cs->setValue( coordSystem() );
00096 
00097   config->addSubsection( cs );
00098 
00099   config->addNameValue( "Count", int(occur_.size()) );
00100 
00101   const_iterator oc = occur_.begin();
00102   const_iterator end = occur_.end();
00103 
00104   while ( oc != end ) {
00105 
00106     ConfigurationPtr cfg( new ConfigurationImpl("Point") );
00107     std::string id = (*oc)->id();
00108     Scalar x = (*oc)->x();
00109     Scalar y = (*oc)->y();
00110     gt_->transfIn( &x, &y );
00111     int precision = 9;
00112     cfg->addNameValue( "Id", id );
00113     cfg->addNameValue( "X", x, precision );
00114     cfg->addNameValue( "Y", y, precision );
00115     if ( (*oc)->hasEnvironment() ) {
00116 
00117       cfg->addNameValue( "Sample", (*oc)->originalEnvironment() );
00118     }
00119     config->addSubsection( cfg );
00120 
00121     oc++;
00122   }
00123 
00124   return config;
00125 }
00126 
00127 void
00128 OccurrencesImpl::setConfiguration( const ConstConfigurationPtr& config )
00129 {
00130   label_ = config->getAttribute("Label");
00131   
00132   ConstConfigurationPtr cs_config = config->getSubsection( "CoordinateSystem", false );
00133   
00134   if ( ! cs_config ) {
00135 
00136     Log::instance()->warn( "Occurrences has no Coordinate System. Assuming LatLong WSG84\n" );
00137     cs_ = GeoTransform::getDefaultCS();
00138   }
00139   else {
00140 
00141     cs_ = cs_config->getValue();
00142   }
00143 
00144   initGeoTransform( );
00145 
00146   Configuration::subsection_list subs = config->getAllSubsections();
00147 
00148   Configuration::subsection_list::iterator begin = subs.begin();
00149   Configuration::subsection_list::iterator end = subs.end();
00150 
00151   std::vector<Scalar> attrs;
00152 
00153   for ( ; begin != end; ++begin ) {
00154 
00155     if ( (*begin)->getName() != "Point" ) {
00156 
00157       continue;
00158     }
00159 
00160     std::string id = (*begin)->getAttribute("Id");
00161     Scalar x = (*begin)->getAttributeAsDouble( "X", 0.0 );
00162     Scalar y = (*begin)->getAttributeAsDouble( "Y", 0.0 );
00163     Scalar abundance = (*begin)->getAttributeAsDouble( "Abundance", default_abundance_ );
00164 
00165     try {
00166 
00167       // If present, load environmental values from XML
00168       std::vector<Scalar> unnormenv = (*begin)->getAttributeAsVecDouble( "Sample" );
00169       createOccurrence( id, x, y, 0, abundance, attrs, unnormenv );
00170     }
00171     catch ( AttributeNotFound& e ) { 
00172 
00173       // Sample attribute is optional
00174       createOccurrence( id, x, y, 0, abundance, 0, 0, 0, 0 );
00175       UNUSED(e);
00176     }
00177   }
00178 }
00179 
00180 void 
00181 OccurrencesImpl::setEnvironment( const EnvironmentPtr& env, const char *type )
00182 {
00183   if ( isEmpty() ) {
00184 
00185     return;
00186   }
00187 
00188   OccurrencesImpl::iterator oc = occur_.begin();
00189   OccurrencesImpl::iterator fin = occur_.end();
00190 
00191   while ( oc != fin ) {
00192 
00193     Sample sample = env->getUnnormalized( (*oc)->x(), (*oc)->y() );
00194 
00195     if ( sample.size() == 0 ) {
00196 
00197       Log::instance()->warn( "%s Point \"%s\" at (%f,%f) has no environment. It will be discarded.\n", type, ((*oc)->id()).c_str(), (*oc)->x(), (*oc)->y() );
00198 
00199       oc = occur_.erase( oc );
00200       fin = occur_.end();
00201     } 
00202     else {
00203 
00204       (*oc)->setUnnormalizedEnvironment( sample );
00205       (*oc)->setNormalizedEnvironment( Sample() );
00206 
00207       ++oc;
00208     }
00209   }
00210 }
00211 
00212 /*****************/
00213 /*** normalize ***/
00214 void 
00215 OccurrencesImpl::normalize( Normalizer * normalizerPtr, size_t categoricalThreshold )
00216 {
00217   if ( ! normalizerPtr ) {
00218 
00219     return;
00220   }
00221 
00222   OccurrencesImpl::const_iterator occ = occur_.begin();
00223   OccurrencesImpl::const_iterator end = occur_.end();
00224   
00225   // set the normalized values 
00226   while ( occ != end ) {
00227 
00228     (*occ)->normalize( normalizerPtr, categoricalThreshold );
00229     ++occ;
00230   }
00231 }
00232 
00233 /***************************/
00234 /*** reset Normalization ***/
00235 void 
00236 OccurrencesImpl::resetNormalization()
00237 {
00238   OccurrencesImpl::const_iterator occ = occur_.begin();
00239   OccurrencesImpl::const_iterator end = occur_.end();
00240   
00241   while ( occ != end ) {
00242 
00243     (*occ)->setNormalizedEnvironment( (*occ)->originalEnvironment() );
00244     ++occ;
00245   }
00246 }
00247 
00248 /******************/
00249 /*** get MinMax ***/
00250 void
00251 OccurrencesImpl::getMinMax( Sample * min, Sample * max ) const
00252 {
00253   OccurrencesImpl::const_iterator occ = occur_.begin();
00254   OccurrencesImpl::const_iterator end = occur_.end();
00255 
00256   *min = Sample( (*occ)->environment() );
00257   *max = Sample( (*occ)->environment() );
00258 
00259   // grab max and min values per variable
00260   while ( occ != end ) {
00261 
00262       Sample sample = (*occ)->environment();
00263       *min &= sample;
00264       *max |= sample;
00265       ++occ;
00266   }
00267 }
00268 
00269 
00270 /**************/
00271 /*** insert ***/
00272 void
00273 OccurrencesImpl::createOccurrence( const std::string& id, 
00274                                    Coord longitude, Coord latitude,
00275                                    Scalar error, Scalar abundance,
00276                                    int num_attributes, Scalar *attributes,
00277                                    int num_env, Scalar *env )
00278 {
00279   // Transforms the given coordinates in the common openModeller
00280   // coordinate system.
00281   gt_->transfOut( &longitude, &latitude );
00282   
00283   insert( new OccurrenceImpl( id, longitude, latitude, error, abundance,
00284             num_attributes, attributes,
00285             num_env, env ) );
00286   
00287 }
00288 
00289 void 
00290 OccurrencesImpl::createOccurrence( const std::string& id, 
00291                                    Coord longitude, Coord latitude,
00292                                    Scalar error, Scalar abundance,
00293                                    std::vector<double> attributes,
00294                                    std::vector<double> env)
00295 {
00296   // Transforms the given coordinates in the common openModeller
00297   // coordinate system.
00298   gt_->transfOut( &longitude, &latitude );
00299   
00300   insert( new OccurrenceImpl( id, longitude, latitude, error, abundance,
00301             attributes, env ) );
00302   
00303 }
00304 
00305 void
00306 OccurrencesImpl::insert( const OccurrencePtr& oc )
00307 {
00308   occur_.push_back( oc );
00309 }
00310 
00311 OccurrencesImpl*
00312 OccurrencesImpl::clone() const
00313 {
00314   
00315   const_iterator it = occur_.begin();
00316   const_iterator end = occur_.end();
00317   
00318   OccurrencesImpl* clone = new OccurrencesImpl( label_, cs_ );
00319 
00320   while( it != end ) {
00321     
00322     clone->insert( new OccurrenceImpl( *(*it) ) );
00323     
00324     it++;
00325   }
00326 
00327   return clone;
00328 }
00329 
00330 bool
00331 OccurrencesImpl::hasEnvironment() const
00332 {
00333   if ( ! numOccurrences() ) {
00334 
00335       return false;
00336   }
00337 
00338   const_iterator it = occur_.begin();
00339 
00340   return (*it)->hasEnvironment();
00341 }
00342 
00343 int
00344 OccurrencesImpl::dimension() const
00345 {
00346   if ( hasEnvironment() ) {
00347 
00348       const_iterator it = occur_.begin();
00349 
00350       return (*it)->environment().size();
00351   }
00352   else { 
00353 
00354       return 0;
00355   }
00356 }
00357 
00358 /******************/
00359 /*** get Random ***/
00360 ConstOccurrencePtr
00361 OccurrencesImpl::getRandom() const
00362 {
00363   Random rnd;
00364   int selected = (int) rnd( numOccurrences() );
00365 
00366   return occur_[ selected ];
00367 }
00368 
00369 OccurrencesImpl::iterator
00370 OccurrencesImpl::erase( const iterator& it ) 
00371 {
00372   swap( occur_.back(), (*it) );
00373   occur_.pop_back();
00374   return it;
00375 }
00376 
00377 
00378 void 
00379 OccurrencesImpl::appendFrom( const OccurrencesPtr& source )
00380 {
00381   if ( ! source ) {
00382 
00383     return;
00384   }
00385 
00386   const_iterator it = source->begin();
00387   const_iterator end = source->end();
00388 
00389   while ( it != end ) {
00390 
00391       insert(*it);
00392       ++it;
00393   }
00394 }
00395 
00396 
00397 /******************************/
00398 /*** get Environment Matrix ***/
00399 std::vector<ScalarVector> 
00400 OccurrencesImpl::getEnvironmentMatrix()
00401 {
00402   std::vector<ScalarVector> matrix( dimension() );
00403 
00404   // Initialize matrix
00405   for ( unsigned int i = 0; i < matrix.size(); i++ ) {
00406 
00407     matrix[i] = ScalarVector( numOccurrences() );
00408   }
00409 
00410   const_iterator c = occur_.begin();
00411   const_iterator end = occur_.end();
00412 
00413   int j = 0;
00414 
00415   // For each Occurrence
00416   while ( c != end ) {
00417       
00418     Sample const& sample = (*c)->environment();
00419 
00420     // For each layer
00421     for ( unsigned int i = 0; i < matrix.size(); i++ ) {
00422 
00423       // Feed new matrix
00424       matrix[i][j] = sample[i];
00425     }
00426 
00427     ++c;
00428     ++j;
00429   }
00430 
00431   return matrix;
00432 }
00433 
00434 
00435 /*************/
00436 /*** print ***/
00437 void
00438 OccurrencesImpl::dump( std::string msg ) const
00439 {
00440   Log::instance()->info( "%s\n", msg.c_str() );
00441 
00442   // Occurrences general data.
00443   Log::instance()->info( "Label: %s\n", label_.c_str() );
00444   Log::instance()->info( "\nOccurrences: %d\n\n", numOccurrences() );
00445 
00446   const_iterator c = occur_.begin();
00447   const_iterator end = occur_.end();
00448 
00449   while ( c != end ) {
00450 
00451     // Get attributes
00452 
00453     ostringstream ss;
00454     
00455     Sample::const_iterator attr = (*c)->attributes().begin();
00456     Sample::const_iterator end = (*c)->attributes().end();
00457     ss << "( ";
00458 
00459     while ( attr != end ) {
00460 
00461       ss << *attr << " ";
00462       attr++;
00463     }
00464 
00465     ss << ")\n";
00466 
00467     Log::instance()->info( "(%+8.4f, %+8.4f)\n", (*c)->x(), (*c)->y() );
00468     
00469     (*c)->dump();
00470     
00471     c++;
00472   }
00473 }
00474 
00475 
00476 /***************************/
00477 /**** split Occurrences ****/
00478 void splitOccurrences(const OccurrencesPtr& occurrences, 
00479                       OccurrencesPtr& trainOccurrences, 
00480                       OccurrencesPtr& testOccurrences, 
00481                       double propTrain)
00482 {
00483   // add all samples to an array
00484   int i;
00485   int n = occurrences->numOccurrences();
00486   int k = (int) (n * propTrain);
00487   std::vector<int> goToTrainSet(n);
00488 
00489   // first k are set to go to train set
00490   for ( i = 0; i < k; i++ ) {
00491 
00492     goToTrainSet[i] = 1;
00493   }
00494 
00495   // all others are set to go to test set
00496   for ( ; i < n; i++ ) {
00497 
00498     goToTrainSet[i] = 0;
00499   }
00500 
00501   // shuffle elements well
00502   initRandom();
00503 
00504   std::random_shuffle( goToTrainSet.begin(), goToTrainSet.end() );
00505 
00506   // traverse occurrences copying them to the right sampler
00507   OccurrencesImpl::const_iterator it = occurrences->begin();
00508   OccurrencesImpl::const_iterator fin = occurrences->end();
00509 
00510   i = 0;
00511 
00512   while( it != fin ) {
00513 
00514     if ( goToTrainSet[i] ) {
00515 
00516       trainOccurrences->insert( new OccurrenceImpl( *(*it) ) );
00517     }
00518     else {
00519 
00520       testOccurrences->insert( new OccurrenceImpl( *(*it) ) );
00521     }
00522 
00523     ++i; ++it;
00524   }
00525 }
00526 
00527 /***************************/
00528 /**** split Occurrences in train/test using distance between points( Missae 09/2009 ) ****/
00529 void splitOccurrences(const OccurrencesPtr& occurrences, 
00530                       OccurrencesPtr& trainOccurrences, 
00531                       OccurrencesPtr& testOccurrences)
00532 {
00533   double dist, distLimit=8.0, x, y, xmin, xmax, ymin, ymax, deltax, deltay;
00534   unsigned int flag = 0, i = 0, itrain=0, ktrain=0, ioccur=0, flagOk=0;
00535   std::vector<double> occurTransformx( occurrences->numOccurrences() );
00536   std::vector<double> occurTransformy( occurrences->numOccurrences() );
00537   std::vector<int> testId( occurrences->numOccurrences() );
00538   int n = occurrences->numOccurrences(), icont=0;
00539   int nptTeste = (int) (n * 0.40) + 2;
00540 
00541   OccurrencesImpl::const_iterator it = occurrences->begin();
00542   OccurrencesImpl::const_iterator fin = occurrences->end();
00543   
00544   xmin = xmax = (*it)->x();
00545   ymin = ymax = (*it)->y();
00546   
00547   ++it;
00548   while( it != fin ) {
00549     if ( (*it)->x() < xmin ) xmin = (*it)->x();
00550   else  if ( (*it)->x() > xmax) xmax = (*it)->x();
00551   if ( (*it)->y() < ymin) ymin = (*it)->y();
00552   else  if ( (*it)->y() > ymax) ymax = (*it)->y();
00553     ++it;
00554   }
00555   deltax = xmax - xmin;
00556   deltay = ymax - ymin;
00557 
00558   it = occurrences->begin();
00559   while( it != fin ) {
00560     occurTransformx[i] = 100 * ( (*it)->x() - xmin ) / deltax;
00561     occurTransformy[i] = 100 * ( (*it)->y() - ymin ) / deltay;
00562     i++;
00563     ++it;
00564   }
00565   do{
00566     flagOk=0, flag = 0, itrain=0, ktrain=0, ioccur=0, icont=0;
00567 
00568     it = occurrences->begin();
00569 
00570     trainOccurrences->insert( new OccurrenceImpl( *(*it) ) );
00571     testOccurrences->insert( new OccurrenceImpl( *(*it) ) );
00572 
00573     ++it;
00574     testId[ktrain] = ioccur;
00575     ktrain++;
00576     ioccur++;
00577 
00578     while( it != fin ) {
00579 
00580       for ( i = 0; i < ktrain; i++ ) {
00581       itrain = testId[i];
00582       x = occurTransformx[ioccur] - occurTransformx[itrain];
00583       y = occurTransformy[ioccur] - occurTransformy[itrain];
00584         dist = sqrt(  (x*x) + (y*y)  );
00585 
00586         if ( dist < distLimit) {
00587           testOccurrences->insert( new OccurrenceImpl( *(*it) ) );
00588       flag = 1;
00589       icont++;
00590       break;
00591         }
00592     }
00593 
00594     if (icont > nptTeste){
00595       OccurrencesImpl::iterator it = testOccurrences->begin();
00596       OccurrencesImpl::iterator last = testOccurrences->end();
00597     --last;
00598       while ( it != last ) {
00599       it = testOccurrences->erase(it);
00600       last = testOccurrences->end();
00601       --last;
00602       }
00603     it = testOccurrences->erase(it);
00604 
00605       OccurrencesImpl::iterator itt = trainOccurrences->begin();
00606       OccurrencesImpl::iterator lastt = trainOccurrences->end();
00607     --lastt;
00608       while ( itt != lastt ) {
00609       itt = trainOccurrences->erase(itt);
00610       lastt = trainOccurrences->end();
00611       --lastt;
00612       }
00613     itt = trainOccurrences->erase(itt);
00614 
00615         if (distLimit > 1.0) distLimit = distLimit - 1.0;
00616     else if (distLimit > 0.2) distLimit = distLimit - 0.2;
00617     else distLimit = distLimit - 0.02;
00618       flagOk=1;
00619         break;
00620     }
00621       if (!flag){
00622         trainOccurrences->insert( new OccurrenceImpl( *(*it) ) );
00623         testId[ktrain] = ioccur;
00624       ktrain++;
00625     }else{
00626         flag = 0;
00627     }
00628     ioccur++;
00629       ++it;
00630     }
00631   }while(flagOk == 1);
00632 }