openModeller
Version 1.4.0
|
00001 00028 #include <openmodeller/Occurrences.hh> 00029 00030 #include <openmodeller/Occurrence.hh> 00031 #include <openmodeller/Random.hh> 00032 #include <openmodeller/Log.hh> 00033 #include <openmodeller/env_io/GeoTransform.hh> 00034 #include <openmodeller/Configuration.hh> 00035 #include <openmodeller/Exceptions.hh> 00036 #include <openmodeller/os_specific.hh> 00037 00038 #include <string> 00039 using std::string; 00040 00041 // String stream is included for the dump method 00042 #include <sstream> 00043 using std::ostringstream; 00044 00045 #include <algorithm> // needed for random_shuffle 00046 00047 #include <math.h> 00048 00049 /****************************************************************/ 00050 /************************ Occurrences ***************************/ 00051 00052 /*******************/ 00053 /*** Constructor ***/ 00054 00055 OccurrencesImpl::~OccurrencesImpl() 00056 { 00057 delete gt_; 00058 } 00059 00060 void 00061 OccurrencesImpl::setLabel( const string& label ) 00062 { 00063 label_ = label; 00064 } 00065 00066 void 00067 OccurrencesImpl::setCoordinateSystem( const string& cs ) 00068 { 00069 cs_ = cs; 00070 initGeoTransform(); 00071 } 00072 00073 void 00074 OccurrencesImpl::initGeoTransform() 00075 { 00076 if ( gt_ ) { 00077 00078 delete gt_; 00079 } 00080 00081 gt_ = new GeoTransform( cs_, GeoTransform::getDefaultCS() ); 00082 } 00083 00084 /*********************/ 00085 /*** configuration ***/ 00086 00087 ConfigurationPtr 00088 OccurrencesImpl::getConfiguration() const 00089 { 00090 ConfigurationPtr config( new ConfigurationImpl("Occurrences") ); 00091 00092 config->addNameValue( "Label", label() ); 00093 00094 ConfigurationPtr cs( new ConfigurationImpl( "CoordinateSystem" ) ); 00095 cs->setValue( coordSystem() ); 00096 00097 config->addSubsection( cs ); 00098 00099 config->addNameValue( "Count", int(occur_.size()) ); 00100 00101 const_iterator oc = occur_.begin(); 00102 const_iterator end = occur_.end(); 00103 00104 while ( oc != end ) { 00105 00106 ConfigurationPtr cfg( new ConfigurationImpl("Point") ); 00107 std::string id = (*oc)->id(); 00108 Scalar x = (*oc)->x(); 00109 Scalar y = (*oc)->y(); 00110 gt_->transfIn( &x, &y ); 00111 int precision = 9; 00112 cfg->addNameValue( "Id", id ); 00113 cfg->addNameValue( "X", x, precision ); 00114 cfg->addNameValue( "Y", y, precision ); 00115 if ( (*oc)->hasEnvironment() ) { 00116 00117 cfg->addNameValue( "Sample", (*oc)->originalEnvironment() ); 00118 } 00119 config->addSubsection( cfg ); 00120 00121 oc++; 00122 } 00123 00124 return config; 00125 } 00126 00127 void 00128 OccurrencesImpl::setConfiguration( const ConstConfigurationPtr& config ) 00129 { 00130 label_ = config->getAttribute("Label"); 00131 00132 ConstConfigurationPtr cs_config = config->getSubsection( "CoordinateSystem", false ); 00133 00134 if ( ! cs_config ) { 00135 00136 Log::instance()->warn( "Occurrences has no Coordinate System. Assuming LatLong WSG84\n" ); 00137 cs_ = GeoTransform::getDefaultCS(); 00138 } 00139 else { 00140 00141 cs_ = cs_config->getValue(); 00142 } 00143 00144 initGeoTransform( ); 00145 00146 Configuration::subsection_list subs = config->getAllSubsections(); 00147 00148 Configuration::subsection_list::iterator begin = subs.begin(); 00149 Configuration::subsection_list::iterator end = subs.end(); 00150 00151 std::vector<Scalar> attrs; 00152 00153 for ( ; begin != end; ++begin ) { 00154 00155 if ( (*begin)->getName() != "Point" ) { 00156 00157 continue; 00158 } 00159 00160 std::string id = (*begin)->getAttribute("Id"); 00161 Scalar x = (*begin)->getAttributeAsDouble( "X", 0.0 ); 00162 Scalar y = (*begin)->getAttributeAsDouble( "Y", 0.0 ); 00163 Scalar abundance = (*begin)->getAttributeAsDouble( "Abundance", default_abundance_ ); 00164 00165 try { 00166 00167 // If present, load environmental values from XML 00168 std::vector<Scalar> unnormenv = (*begin)->getAttributeAsVecDouble( "Sample" ); 00169 createOccurrence( id, x, y, 0, abundance, attrs, unnormenv ); 00170 } 00171 catch ( AttributeNotFound& e ) { 00172 00173 // Sample attribute is optional 00174 createOccurrence( id, x, y, 0, abundance, 0, 0, 0, 0 ); 00175 UNUSED(e); 00176 } 00177 } 00178 } 00179 00180 void 00181 OccurrencesImpl::setEnvironment( const EnvironmentPtr& env, const char *type ) 00182 { 00183 if ( isEmpty() ) { 00184 00185 return; 00186 } 00187 00188 OccurrencesImpl::iterator oc = occur_.begin(); 00189 OccurrencesImpl::iterator fin = occur_.end(); 00190 00191 while ( oc != fin ) { 00192 00193 Sample sample = env->getUnnormalized( (*oc)->x(), (*oc)->y() ); 00194 00195 if ( sample.size() == 0 ) { 00196 00197 Log::instance()->warn( "%s Point \"%s\" at (%f,%f) has no environment. It will be discarded.\n", type, ((*oc)->id()).c_str(), (*oc)->x(), (*oc)->y() ); 00198 00199 oc = occur_.erase( oc ); 00200 fin = occur_.end(); 00201 } 00202 else { 00203 00204 (*oc)->setUnnormalizedEnvironment( sample ); 00205 (*oc)->setNormalizedEnvironment( Sample() ); 00206 00207 ++oc; 00208 } 00209 } 00210 } 00211 00212 /*****************/ 00213 /*** normalize ***/ 00214 void 00215 OccurrencesImpl::normalize( Normalizer * normalizerPtr, size_t categoricalThreshold ) 00216 { 00217 if ( ! normalizerPtr ) { 00218 00219 return; 00220 } 00221 00222 OccurrencesImpl::const_iterator occ = occur_.begin(); 00223 OccurrencesImpl::const_iterator end = occur_.end(); 00224 00225 // set the normalized values 00226 while ( occ != end ) { 00227 00228 (*occ)->normalize( normalizerPtr, categoricalThreshold ); 00229 ++occ; 00230 } 00231 } 00232 00233 /***************************/ 00234 /*** reset Normalization ***/ 00235 void 00236 OccurrencesImpl::resetNormalization() 00237 { 00238 OccurrencesImpl::const_iterator occ = occur_.begin(); 00239 OccurrencesImpl::const_iterator end = occur_.end(); 00240 00241 while ( occ != end ) { 00242 00243 (*occ)->setNormalizedEnvironment( (*occ)->originalEnvironment() ); 00244 ++occ; 00245 } 00246 } 00247 00248 /******************/ 00249 /*** get MinMax ***/ 00250 void 00251 OccurrencesImpl::getMinMax( Sample * min, Sample * max ) const 00252 { 00253 OccurrencesImpl::const_iterator occ = occur_.begin(); 00254 OccurrencesImpl::const_iterator end = occur_.end(); 00255 00256 *min = Sample( (*occ)->environment() ); 00257 *max = Sample( (*occ)->environment() ); 00258 00259 // grab max and min values per variable 00260 while ( occ != end ) { 00261 00262 Sample sample = (*occ)->environment(); 00263 *min &= sample; 00264 *max |= sample; 00265 ++occ; 00266 } 00267 } 00268 00269 00270 /**************/ 00271 /*** insert ***/ 00272 void 00273 OccurrencesImpl::createOccurrence( const std::string& id, 00274 Coord longitude, Coord latitude, 00275 Scalar error, Scalar abundance, 00276 int num_attributes, Scalar *attributes, 00277 int num_env, Scalar *env ) 00278 { 00279 // Transforms the given coordinates in the common openModeller 00280 // coordinate system. 00281 gt_->transfOut( &longitude, &latitude ); 00282 00283 insert( new OccurrenceImpl( id, longitude, latitude, error, abundance, 00284 num_attributes, attributes, 00285 num_env, env ) ); 00286 00287 } 00288 00289 void 00290 OccurrencesImpl::createOccurrence( const std::string& id, 00291 Coord longitude, Coord latitude, 00292 Scalar error, Scalar abundance, 00293 std::vector<double> attributes, 00294 std::vector<double> env) 00295 { 00296 // Transforms the given coordinates in the common openModeller 00297 // coordinate system. 00298 gt_->transfOut( &longitude, &latitude ); 00299 00300 insert( new OccurrenceImpl( id, longitude, latitude, error, abundance, 00301 attributes, env ) ); 00302 00303 } 00304 00305 void 00306 OccurrencesImpl::insert( const OccurrencePtr& oc ) 00307 { 00308 occur_.push_back( oc ); 00309 } 00310 00311 OccurrencesImpl* 00312 OccurrencesImpl::clone() const 00313 { 00314 00315 const_iterator it = occur_.begin(); 00316 const_iterator end = occur_.end(); 00317 00318 OccurrencesImpl* clone = new OccurrencesImpl( label_, cs_ ); 00319 00320 while( it != end ) { 00321 00322 clone->insert( new OccurrenceImpl( *(*it) ) ); 00323 00324 it++; 00325 } 00326 00327 return clone; 00328 } 00329 00330 bool 00331 OccurrencesImpl::hasEnvironment() const 00332 { 00333 if ( ! numOccurrences() ) { 00334 00335 return false; 00336 } 00337 00338 const_iterator it = occur_.begin(); 00339 00340 return (*it)->hasEnvironment(); 00341 } 00342 00343 int 00344 OccurrencesImpl::dimension() const 00345 { 00346 if ( hasEnvironment() ) { 00347 00348 const_iterator it = occur_.begin(); 00349 00350 return (*it)->environment().size(); 00351 } 00352 else { 00353 00354 return 0; 00355 } 00356 } 00357 00358 /******************/ 00359 /*** get Random ***/ 00360 ConstOccurrencePtr 00361 OccurrencesImpl::getRandom() const 00362 { 00363 Random rnd; 00364 int selected = (int) rnd( numOccurrences() ); 00365 00366 return occur_[ selected ]; 00367 } 00368 00369 OccurrencesImpl::iterator 00370 OccurrencesImpl::erase( const iterator& it ) 00371 { 00372 swap( occur_.back(), (*it) ); 00373 occur_.pop_back(); 00374 return it; 00375 } 00376 00377 00378 void 00379 OccurrencesImpl::appendFrom( const OccurrencesPtr& source ) 00380 { 00381 if ( ! source ) { 00382 00383 return; 00384 } 00385 00386 const_iterator it = source->begin(); 00387 const_iterator end = source->end(); 00388 00389 while ( it != end ) { 00390 00391 insert(*it); 00392 ++it; 00393 } 00394 } 00395 00396 00397 /******************************/ 00398 /*** get Environment Matrix ***/ 00399 std::vector<ScalarVector> 00400 OccurrencesImpl::getEnvironmentMatrix() 00401 { 00402 std::vector<ScalarVector> matrix( dimension() ); 00403 00404 // Initialize matrix 00405 for ( unsigned int i = 0; i < matrix.size(); i++ ) { 00406 00407 matrix[i] = ScalarVector( numOccurrences() ); 00408 } 00409 00410 const_iterator c = occur_.begin(); 00411 const_iterator end = occur_.end(); 00412 00413 int j = 0; 00414 00415 // For each Occurrence 00416 while ( c != end ) { 00417 00418 Sample const& sample = (*c)->environment(); 00419 00420 // For each layer 00421 for ( unsigned int i = 0; i < matrix.size(); i++ ) { 00422 00423 // Feed new matrix 00424 matrix[i][j] = sample[i]; 00425 } 00426 00427 ++c; 00428 ++j; 00429 } 00430 00431 return matrix; 00432 } 00433 00434 00435 /*************/ 00436 /*** print ***/ 00437 void 00438 OccurrencesImpl::dump( std::string msg ) const 00439 { 00440 Log::instance()->info( "%s\n", msg.c_str() ); 00441 00442 // Occurrences general data. 00443 Log::instance()->info( "Label: %s\n", label_.c_str() ); 00444 Log::instance()->info( "\nOccurrences: %d\n\n", numOccurrences() ); 00445 00446 const_iterator c = occur_.begin(); 00447 const_iterator end = occur_.end(); 00448 00449 while ( c != end ) { 00450 00451 // Get attributes 00452 00453 ostringstream ss; 00454 00455 Sample::const_iterator attr = (*c)->attributes().begin(); 00456 Sample::const_iterator end = (*c)->attributes().end(); 00457 ss << "( "; 00458 00459 while ( attr != end ) { 00460 00461 ss << *attr << " "; 00462 attr++; 00463 } 00464 00465 ss << ")\n"; 00466 00467 Log::instance()->info( "(%+8.4f, %+8.4f)\n", (*c)->x(), (*c)->y() ); 00468 00469 (*c)->dump(); 00470 00471 c++; 00472 } 00473 } 00474 00475 00476 /***************************/ 00477 /**** split Occurrences ****/ 00478 void splitOccurrences(const OccurrencesPtr& occurrences, 00479 OccurrencesPtr& trainOccurrences, 00480 OccurrencesPtr& testOccurrences, 00481 double propTrain) 00482 { 00483 // add all samples to an array 00484 int i; 00485 int n = occurrences->numOccurrences(); 00486 int k = (int) (n * propTrain); 00487 std::vector<int> goToTrainSet(n); 00488 00489 // first k are set to go to train set 00490 for ( i = 0; i < k; i++ ) { 00491 00492 goToTrainSet[i] = 1; 00493 } 00494 00495 // all others are set to go to test set 00496 for ( ; i < n; i++ ) { 00497 00498 goToTrainSet[i] = 0; 00499 } 00500 00501 // shuffle elements well 00502 initRandom(); 00503 00504 std::random_shuffle( goToTrainSet.begin(), goToTrainSet.end() ); 00505 00506 // traverse occurrences copying them to the right sampler 00507 OccurrencesImpl::const_iterator it = occurrences->begin(); 00508 OccurrencesImpl::const_iterator fin = occurrences->end(); 00509 00510 i = 0; 00511 00512 while( it != fin ) { 00513 00514 if ( goToTrainSet[i] ) { 00515 00516 trainOccurrences->insert( new OccurrenceImpl( *(*it) ) ); 00517 } 00518 else { 00519 00520 testOccurrences->insert( new OccurrenceImpl( *(*it) ) ); 00521 } 00522 00523 ++i; ++it; 00524 } 00525 } 00526 00527 /***************************/ 00528 /**** split Occurrences in train/test using distance between points( Missae 09/2009 ) ****/ 00529 void splitOccurrences(const OccurrencesPtr& occurrences, 00530 OccurrencesPtr& trainOccurrences, 00531 OccurrencesPtr& testOccurrences) 00532 { 00533 double dist, distLimit=8.0, x, y, xmin, xmax, ymin, ymax, deltax, deltay; 00534 unsigned int flag = 0, i = 0, itrain=0, ktrain=0, ioccur=0, flagOk=0; 00535 std::vector<double> occurTransformx( occurrences->numOccurrences() ); 00536 std::vector<double> occurTransformy( occurrences->numOccurrences() ); 00537 std::vector<int> testId( occurrences->numOccurrences() ); 00538 int n = occurrences->numOccurrences(), icont=0; 00539 int nptTeste = (int) (n * 0.40) + 2; 00540 00541 OccurrencesImpl::const_iterator it = occurrences->begin(); 00542 OccurrencesImpl::const_iterator fin = occurrences->end(); 00543 00544 xmin = xmax = (*it)->x(); 00545 ymin = ymax = (*it)->y(); 00546 00547 ++it; 00548 while( it != fin ) { 00549 if ( (*it)->x() < xmin ) xmin = (*it)->x(); 00550 else if ( (*it)->x() > xmax) xmax = (*it)->x(); 00551 if ( (*it)->y() < ymin) ymin = (*it)->y(); 00552 else if ( (*it)->y() > ymax) ymax = (*it)->y(); 00553 ++it; 00554 } 00555 deltax = xmax - xmin; 00556 deltay = ymax - ymin; 00557 00558 it = occurrences->begin(); 00559 while( it != fin ) { 00560 occurTransformx[i] = 100 * ( (*it)->x() - xmin ) / deltax; 00561 occurTransformy[i] = 100 * ( (*it)->y() - ymin ) / deltay; 00562 i++; 00563 ++it; 00564 } 00565 do{ 00566 flagOk=0, flag = 0, itrain=0, ktrain=0, ioccur=0, icont=0; 00567 00568 it = occurrences->begin(); 00569 00570 trainOccurrences->insert( new OccurrenceImpl( *(*it) ) ); 00571 testOccurrences->insert( new OccurrenceImpl( *(*it) ) ); 00572 00573 ++it; 00574 testId[ktrain] = ioccur; 00575 ktrain++; 00576 ioccur++; 00577 00578 while( it != fin ) { 00579 00580 for ( i = 0; i < ktrain; i++ ) { 00581 itrain = testId[i]; 00582 x = occurTransformx[ioccur] - occurTransformx[itrain]; 00583 y = occurTransformy[ioccur] - occurTransformy[itrain]; 00584 dist = sqrt( (x*x) + (y*y) ); 00585 00586 if ( dist < distLimit) { 00587 testOccurrences->insert( new OccurrenceImpl( *(*it) ) ); 00588 flag = 1; 00589 icont++; 00590 break; 00591 } 00592 } 00593 00594 if (icont > nptTeste){ 00595 OccurrencesImpl::iterator it = testOccurrences->begin(); 00596 OccurrencesImpl::iterator last = testOccurrences->end(); 00597 --last; 00598 while ( it != last ) { 00599 it = testOccurrences->erase(it); 00600 last = testOccurrences->end(); 00601 --last; 00602 } 00603 it = testOccurrences->erase(it); 00604 00605 OccurrencesImpl::iterator itt = trainOccurrences->begin(); 00606 OccurrencesImpl::iterator lastt = trainOccurrences->end(); 00607 --lastt; 00608 while ( itt != lastt ) { 00609 itt = trainOccurrences->erase(itt); 00610 lastt = trainOccurrences->end(); 00611 --lastt; 00612 } 00613 itt = trainOccurrences->erase(itt); 00614 00615 if (distLimit > 1.0) distLimit = distLimit - 1.0; 00616 else if (distLimit > 0.2) distLimit = distLimit - 0.2; 00617 else distLimit = distLimit - 0.02; 00618 flagOk=1; 00619 break; 00620 } 00621 if (!flag){ 00622 trainOccurrences->insert( new OccurrenceImpl( *(*it) ) ); 00623 testId[ktrain] = ioccur; 00624 ktrain++; 00625 }else{ 00626 flag = 0; 00627 } 00628 ioccur++; 00629 ++it; 00630 } 00631 }while(flagOk == 1); 00632 }