openModeller  Version 1.4.0
om_sampler.cpp
Go to the documentation of this file.
00001 #include <openmodeller/om.hh>
00002 #include <openmodeller/Configuration.hh>
00003 #include <openmodeller/os_specific.hh>
00004 #include <openmodeller/FileParser.hh>
00005 
00006 #include "request_file.hh"
00007 
00008 #include "getopts/getopts.h"
00009 
00010 #include "om_cmd_utils.hh"
00011 
00012 #include <istream>
00013 #include <stdlib.h>
00014 #include <string>
00015 #include <stdio.h>
00016 #include <iostream>  // I/O 
00017 #include <fstream>   // file I/O
00018 
00019 #include <stdexcept>
00020  
00021 using namespace std;
00022 
00023 void printOccurrences( ostream & stream, ConstOccurrencesPtr occ );
00024 
00025 int main( int argc, char **argv ) {
00026 
00027   Options opts;
00028   int option;
00029 
00030   // command-line parameters (short name, long name, description, take args)
00031   opts.addOption( "v", "version"    , "Display version info"                        , false );
00032   opts.addOption( "s", "source"     , "Source with references to points and layers" , true );
00033   opts.addOption( "e", "dump-env"   , "Dump environment data for a specified cell range", false );
00034   opts.addOption( "" , "cell-start" , "Cell position to start environment dumping (default 0)"  , true );
00035   opts.addOption( "" , "cell-end"   , "Cell position to end environment dumping (default 1000)" , true );
00036   opts.addOption( "" , "log-level"  , "Set the log level (debug, warn, info, error)", true );
00037   opts.addOption( "c", "config-file", "Configuration file for openModeller"         , true );
00038 
00039   std::string log_level("info");
00040   std::string source("");
00041   bool        dump_env = false;
00042   std::string start_string("0");
00043   std::string end_string("1000");
00044   std::string config_file;
00045 
00046   if ( ! opts.parse( argc, argv ) ) {
00047 
00048     opts.showHelp( argv[0] ); 
00049     exit(0);
00050   }
00051 
00052   // Set up any related external resources
00053   setupExternalResources();
00054 
00055   OpenModeller om;
00056 
00057   while ( ( option = opts.cycle() ) >= 0 ) {
00058 
00059     switch ( option ) {
00060 
00061       case 0:
00062         printf( "om_sampler %s\n", om.getVersion().c_str() );
00063         printf("This is free software; see the source for copying conditions. There is NO\n");
00064         printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
00065         exit(0);
00066         break;
00067       case 1:
00068         source = opts.getArgs( option );
00069         break;
00070       case 2:
00071         dump_env = true;
00072         break;
00073       case 3:
00074         start_string = opts.getArgs( option );
00075         break;
00076       case 4:
00077         end_string = opts.getArgs( option );
00078         break;
00079       case 5:
00080         log_level = opts.getArgs( option );
00081         break;
00082       case 6:
00083         config_file = opts.getArgs( option );
00084         break;
00085       default:
00086         break;
00087     }
00088   }
00089 
00090   // Check requirements
00091   if ( source.empty() ) {
00092 
00093     printf( "Please specify a source (request file in txt or xml) with references to points and layers.\n");
00094     exit(-1);
00095   }
00096 
00097   // om configuration
00098   if ( ! config_file.empty() ) { 
00099 
00100     Settings::loadConfig( config_file );
00101   }
00102 
00103   int cell_start = 0;
00104   int cell_end = 0;
00105 
00106   if ( dump_env ) {
00107 
00108     cell_start = atoi( start_string.c_str() );
00109 
00110     cell_end = atoi( end_string.c_str() );
00111   }
00112 
00113   // Log stuff
00114 
00115   Log::Level level_code = getLogLevel( log_level );
00116 
00117   Log::instance()->setLevel( level_code );
00118 
00119   // Real work
00120 
00121   try {
00122 
00123     // Read source
00124 
00125     bool is_xml = false;
00126 
00127     // Try to open file and check if first char is "<". In this case we will
00128     // assume that it's an XML file.
00129     std::ifstream fs( source.c_str(), std::ios_base::in );
00130 
00131     if ( fs.fail() ) {
00132 
00133       printf( "Could not open source file.\n");
00134       exit(-1);
00135     }
00136 
00137     std::string line("");
00138 
00139     while ( getline( fs, line ) ) {
00140 
00141       for ( unsigned int i = 0; i < line.size(); ++i ) {    
00142 
00143         // Skip carriage returns, line feeds and spaces
00144         if ( line[i] == '\r' || line[i] == '\n' || line[i] == ' ' ) {
00145 
00146           continue;
00147         }
00148 
00149         // Looks like an XML file
00150         if ( line[i] == '<' ) {
00151 
00152           is_xml = true;
00153         }
00154 
00155         break;
00156       }
00157     }
00158 
00159     fs.close();
00160 
00161     SamplerPtr sampler;
00162 
00163     if ( is_xml ) {
00164 
00165       ConfigurationPtr config = Configuration::readXml( source.c_str() );
00166 
00167       sampler = createSampler( config->getSubsection( "Sampler" ) );
00168     }
00169     else {
00170 
00171       // Must be a txt request file, so parse it
00172 
00173       FileParser fp( source.c_str() );
00174 
00175       // Load points
00176 
00177       // Obtain the Well Known Text string for the localities
00178       // coordinate system.
00179       std::string oc_cs = fp.get( "WKT coord system" );
00180 
00181       // Get the name of the file containing localities
00182       std::string oc_file = fp.get( "Occurrences source" );
00183 
00184       // Get the label
00185       std::string oc_name = fp.get( "Occurrences group" );
00186 
00187       // When a model needs to be created, 'WKT coord system' and 
00188       // 'Species file' are mandatory parameters
00189       if ( oc_cs.empty() ) {
00190 
00191         printf( "'WKT coord system' keyword not specified in the request file!\n" );
00192         exit(-1);
00193       }
00194 
00195       if ( oc_file.empty() ) {
00196 
00197         printf( "'Occurrences source' keyword not specified in the request file!\n" );
00198         exit(-1);
00199       }
00200 
00201       // Populate the occurences list from the localities file
00202       OccurrencesReader* oc_reader = OccurrencesFactory::instance().create( oc_file.c_str(), oc_cs.c_str() );
00203 
00204       OccurrencesPtr presences = oc_reader->getPresences( oc_name.c_str() );
00205 
00206       OccurrencesPtr absences = oc_reader->getAbsences( oc_name.c_str() );
00207 
00208       delete oc_reader;
00209 
00210       // Load layers
00211 
00212       // Mask to select the desired species occurrence points
00213       std::string input_mask = fp.get( "Mask" );
00214 
00215       // Initiate the environment with all maps.
00216       std::vector<std::string> categ_map = fp.getAll( "Categorical map" );
00217       std::vector<std::string> cont_map = fp.getAll( "Map" );
00218 
00219       // When a model needs to be created, there should be at least one input map
00220       if ( ! (categ_map.size() + cont_map.size()) ) {
00221 
00222         printf( "At least one 'Map' or 'Categorical map' needs to be specified in the request file!\n" );
00223         exit(-1);
00224       }
00225 
00226       // If Mask was not specified, use the first layer
00227       if ( input_mask.empty() ) {
00228 
00229         if ( cont_map.size() ) {
00230 
00231           input_mask = cont_map[0];
00232         }
00233         else {
00234 
00235           input_mask = categ_map[0];
00236         }
00237       }
00238 
00239       EnvironmentPtr env = createEnvironment( categ_map, cont_map, input_mask );
00240 
00241       sampler = createSampler( env, presences, absences );
00242 
00243       std::string spatially_unique = fp.get( "Spatially unique" );
00244       if ( spatially_unique == "true" ) {
00245 
00246         sampler->spatiallyUnique();
00247       }
00248 
00249       std::string environmentally_unique = fp.get( "Environmentally unique" );
00250       if ( environmentally_unique == "true" ) {
00251 
00252         sampler->environmentallyUnique();
00253       }
00254     }
00255 
00256     // Print output
00257 
00258     int dim = sampler->numIndependent();
00259 
00260     // Header
00261     cout << "#id\tlabel\tlongitude\tlatitude\tabundance";
00262 
00263     for ( int i = 0; i < dim; ++i ) {
00264 
00265       cout << "\tattr" << i+1;
00266     }
00267 
00268     if ( dump_env ) {
00269 
00270       // Dump environment data
00271 
00272       EnvironmentPtr e = sampler->getEnvironment();
00273 
00274       // Initialize the iterator
00275       MapIterator it = e->getMask()->begin();
00276 
00277       // Initialize the terminal
00278       MapIterator fin;
00279 
00280       int cnt = 0;
00281 
00282       while( it != fin ) {
00283 
00284         if ( cnt < cell_start) {
00285 
00286           ++it;
00287           ++cnt;
00288           continue;
00289         }
00290 
00291         // Get the lon/lat coordinates from the iterator
00292         pair<Coord,Coord> lonlat = *it;
00293 
00294         // Extract the environment sample at that point
00295         Sample s = e->get( lonlat.first, lonlat.second );
00296 
00297         cout << "\n" << cnt << "\t" << "env data" << "\t" << lonlat.first << "\t" << lonlat.second << "\t" << "-";
00298 
00299         // Note:  s will have size() == 0 if the environment
00300         // does not have data for that location. This can happen
00301         // for a couple of reasons.
00302         //   1.  The location is not in the mask.
00303         //   2.  One of the layers has value == NoVal at that location.
00304         if ( s.size() > 0 ) {
00305 
00306           for ( int i = 0; i < dim; ++i ) {
00307 
00308             cout << "\t" << s[i];
00309           }
00310         }
00311         else {
00312 
00313             cout << "\t" << "nodata";
00314   }
00315 
00316         ++cnt;
00317         ++it;
00318 
00319         if ( cnt > cell_end ) {
00320 
00321           break;
00322         }
00323       }
00324 
00325       return 0;
00326     }
00327 
00328     // Default action: dump locality samples
00329 
00330     ConstOccurrencesPtr p = sampler->getPresences();
00331 
00332     if ( p ) {
00333     
00334       printOccurrences( cout, p );
00335     }
00336 
00337     ConstOccurrencesPtr a = sampler->getAbsences();
00338 
00339     if ( a ) {
00340 
00341       printOccurrences( cout, a );
00342     }
00343 
00344     return 0;
00345   }
00346   catch ( runtime_error e ) {
00347 
00348     printf( "om_sampler: %s\n", e.what() );
00349     exit(-1);
00350   }
00351 }
00352 
00353 // Print occurrences
00354 void printOccurrences( ostream & stream, ConstOccurrencesPtr occ ) {
00355 
00356   int dim = occ->dimension();
00357 
00358   OccurrencesImpl::const_iterator it = occ->begin();
00359   OccurrencesImpl::const_iterator fin = occ->end();
00360 
00361   while ( it != fin ) {
00362 
00363     stream << "\n" << ((*it)->id()).c_str() << "\t" << occ->label() << "\t" << (*it)->x() << "\t" << (*it)->y() << "\t" << (*it)->abundance();
00364 
00365     Sample s = (*it)->environment();
00366 
00367     for ( int i = 0; i < dim; ++i ) {
00368 
00369       stream << "\t" << s[i];
00370     }
00371 
00372     it++;
00373   }
00374 }