openModeller  Version 1.4.0
om_pseudo.cpp
Go to the documentation of this file.
00001 #include <openmodeller/om.hh>
00002 #include <openmodeller/os_specific.hh>
00003 #include <openmodeller/Exceptions.hh>
00004 
00005 #include "getopts/getopts.h"
00006 
00007 #include "om_cmd_utils.hh"
00008 
00009 #include <time.h>    // used to limit the number of times that the progress is written to a file
00010 #include <string>
00011 #include <stdexcept>
00012 #include <iomanip>   // std::setprecision
00013 #include <sstream>
00014 
00015 using namespace std;
00016 
00017 int main( int argc, char **argv ) {
00018 
00019   Options opts;
00020   int option;
00021 
00022   // command-line parameters (short name, long name, description, take args)
00023   opts.addOption( "v", "version"    , "Display the version info"                             , false );
00024   opts.addOption( "r", "xml-req"    , "(option 1) Model evaluation request file in XML"      , true );
00025   opts.addOption( "n", "num-points" , "(option 2) Number of points to be generated"          , true );
00026   opts.addOption( "l", "label"      , "(option 2) Label for the points"                      , true );
00027   opts.addOption( "q", "seq-start"  , "(option 2) Sequence start for points id"              , true );
00028   opts.addOption( "m", "mask"       , "(option 2) Mask file"                                 , true );
00029   opts.addOption( "p", "proportion" , "(option 2) Proportion of absence points (decimals)"   , true );
00030   opts.addOption( "o", "model"      , "(option 2) File with serialized model"                , true );
00031   opts.addOption( "t", "threshold"  , "(option 2) Model threshold (default 0.5)"             , true );
00032   opts.addOption( "" , "geo-unique" , "(option 2) Avoid repeating same coordinates"          , false );
00033   opts.addOption( "" , "env-unique" , "(option 2) Avoid repeating same environment condition", false );
00034   opts.addOption( "s", "result"     , "File to store result"                                 , true );
00035   opts.addOption( "" , "log-level"  , "Set the log level (debug, warn, info, error)"         , true );
00036   opts.addOption( "" , "log-file"   , "Log file"                                             , true );
00037   opts.addOption( "" , "prog-file"  , "File to store progress"                               , true );
00038   opts.addOption( "c", "config-file", "Configuration file for openModeller"                  , true );
00039 
00040   std::string log_level("info");
00041   std::string request_file;
00042   std::string num_points_string;
00043   int num_points = 0;
00044   std::string label("label");
00045   std::string sequence_start_string;
00046   int sequence_start = 1;
00047   std::string mask_file;
00048   std::string proportion_string;
00049   double proportion = 1.0;
00050   int num_absences_to_be_generated;
00051   std::string model_file;
00052   std::string threshold_string;
00053   double threshold = 0.5;
00054   bool geo_unique = false;
00055   bool env_unique = false;
00056   std::string result_file;
00057   std::string log_file;
00058   std::string progress_file;
00059   std::string config_file;
00060 
00061   if ( ! opts.parse( argc, argv ) ) {
00062 
00063     opts.showHelp( argv[0] ); 
00064     exit(0);
00065   }
00066 
00067   // Set up any related external resources
00068   setupExternalResources();
00069 
00070   OpenModeller om;
00071 
00072   while ( ( option = opts.cycle() ) >= 0 ) {
00073 
00074     switch ( option ) {
00075 
00076       case 0:
00077         printf( "om_pseudo %s\n", om.getVersion().c_str() );
00078         printf("This is free software; see the source for copying conditions. There is NO\n");
00079         printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
00080         exit(0);
00081         break;
00082       case 1:
00083         request_file = opts.getArgs( option );
00084         break;
00085       case 2:
00086         num_points_string = opts.getArgs( option );
00087         break;
00088       case 3:
00089         label = opts.getArgs( option );
00090         break;
00091       case 4:
00092         sequence_start_string = opts.getArgs( option );
00093         break;
00094       case 5:
00095         mask_file = opts.getArgs( option );
00096         break;
00097       case 6:
00098         proportion_string = opts.getArgs( option );
00099         break;
00100       case 7:
00101         model_file = opts.getArgs( option );
00102         break;
00103       case 8:
00104         threshold_string = opts.getArgs( option );
00105         break;
00106       case 9:
00107         geo_unique = true;
00108         break;
00109       case 10:
00110         env_unique = true;
00111         break;
00112       case 11:
00113         result_file = opts.getArgs( option );
00114         break;
00115       case 12:
00116         log_level = opts.getArgs( option );
00117         break;
00118       case 13:
00119         log_file = opts.getArgs( option );
00120         break;
00121       case 14:
00122         progress_file = opts.getArgs( option );
00123         break;
00124       case 15:
00125         config_file = opts.getArgs( option );
00126         break;
00127       default:
00128         break;
00129     }
00130   }
00131 
00132   // om configuration
00133   if ( ! config_file.empty() ) { 
00134 
00135     Settings::loadConfig( config_file );
00136   }
00137 
00138   // Log stuff
00139 
00140   Log::Level level_code = getLogLevel( log_level );
00141 
00142   if ( ! log_file.empty() ) {
00143 
00144     Log::instance()->set( level_code, log_file, "" );
00145   }
00146   else {
00147  
00148     // Just set the level - things will go to stderr
00149     Log::instance()->setLevel( level_code );
00150   }
00151 
00152   // Check parameters
00153 
00154   if ( request_file.empty() ) {
00155 
00156     if ( num_points_string.empty() ) {
00157 
00158       printf( "Please specify the number of points to be generated\n");
00159       exit(1);
00160     }
00161 
00162     num_points = atoi( num_points_string.c_str() );
00163 
00164     if ( num_points <= 0 ) {
00165 
00166       printf( "Please specify a valid (> 0) number of points to be generated\n");
00167       exit(1);
00168     }
00169 
00170     if ( mask_file.empty() ) {
00171 
00172       printf( "Please specify a mask file\n");
00173       exit(1);
00174     }
00175   
00176     if ( ! sequence_start_string.empty() ) {
00177 
00178       sequence_start = atoi( sequence_start_string.c_str() );
00179     }
00180 
00181     if ( proportion_string.empty() ) {
00182 
00183       proportion_string = "1";
00184     }
00185 
00186     proportion = atof( proportion_string.c_str() );
00187 
00188     if ( proportion > 1.0 ) {
00189 
00190       proportion = 1.0;
00191     }
00192     else if ( proportion < 0.0 ) {
00193 
00194       proportion = 0.0;
00195     }
00196 
00197     if ( ! threshold_string.empty() ) {
00198 
00199       threshold = atof( threshold_string.c_str() );
00200     }
00201 
00202     if ( threshold <= 0.0 ) {
00203 
00204       printf( "Model threshold must be greater than zero\n");
00205       exit(1);
00206     }
00207 
00208     if ( threshold >= 1.0 ) {
00209 
00210       printf( "Model threshold must be smaller than one\n");
00211       exit(1);
00212     }
00213   }
00214   else {
00215 
00216     if ( ! num_points_string.empty() ) {
00217 
00218       Log::instance()->warn( "num-points parameter will be ignored (using XML request instead)\n" );
00219     }
00220     if ( label.compare("label") != 0 ) {
00221 
00222       Log::instance()->warn( "label parameter will be ignored (using XML request instead)\n" );
00223     }
00224     if ( ! sequence_start_string.empty() ) {
00225 
00226       Log::instance()->warn( "seq-start parameter will be ignored (using XML request instead)\n" );
00227     }
00228     if ( ! mask_file.empty() ) {
00229 
00230       Log::instance()->warn( "mask parameter will be ignored (using XML request instead)\n" );
00231     }
00232     if ( ! proportion_string.empty() ) {
00233 
00234       Log::instance()->warn( "proportion parameter will be ignored (using XML request instead)\n" );
00235     }
00236     if ( ! model_file.empty() ) {
00237 
00238       Log::instance()->warn( "model parameter will be ignored (using XML request instead)\n" );
00239     }
00240     if ( ! threshold_string.empty() ) {
00241 
00242       Log::instance()->warn( "threshold parameter will be ignored (using XML request instead)\n" );
00243     }
00244     if ( geo_unique ) {
00245 
00246       Log::instance()->warn( "geo-unique parameter will be ignored (using XML request instead)\n" );
00247     }
00248     if ( env_unique ) {
00249 
00250       Log::instance()->warn( "env-unique parameter will be ignored (using XML request instead)\n" );
00251     }
00252   }
00253 
00254   // Initialize progress data if user wants to track progress
00255   progress_data prog_data;
00256 
00257   if ( ! progress_file.empty() ) { 
00258 
00259     prog_data.file_name = progress_file;
00260 
00261     time( &prog_data.timestamp );
00262 
00263     prog_data.progress = -1.0; // queued
00264 
00265     // Always create initial file with progress 0
00266     progressFileCallback( 0.0, &prog_data );
00267   }
00268   
00269   // Real work
00270 
00271   try {
00272 
00273     SamplerPtr samp;
00274 
00275     Model model = 0;
00276     
00277     if ( request_file.empty() ) {
00278 
00279       if ( model_file.empty() ) {
00280 
00281         std::vector<std::string> categorical_layers, continuous_layers;
00282 
00283         continuous_layers.push_back( mask_file ); // need to add at least one layer
00284 
00285         EnvironmentPtr env = createEnvironment( categorical_layers, continuous_layers );
00286 
00287         if ( ! env ) {
00288 
00289           Log::instance()->error( "Could not create environment object. Aborting.\n");
00290 
00291           // If user is tracking progress
00292           if ( ! progress_file.empty() ) { 
00293 
00294             // -2 means aborted
00295             progressFileCallback( -2.0, &prog_data );
00296           }
00297 
00298           exit(1);
00299         }
00300 
00301         OccurrencesPtr presences( new OccurrencesImpl( label ) );
00302         OccurrencesPtr absences( new OccurrencesImpl( label ) );
00303 
00304         samp = createSampler( env, presences, absences );
00305       }
00306       else {
00307     
00308         // Load available algorithms
00309         AlgorithmFactory::searchDefaultDirs();
00310 
00311         // Load serialized model
00312         ConfigurationPtr config = Configuration::readXml( model_file.c_str() );
00313 
00314         AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) );
00315 
00316         model = alg->getModel();
00317 
00318         // note: alg deserialization doesn't include sampler stuff
00319         SamplerPtr alg_samp = createSampler( config->getSubsection( "Sampler" ) );
00320 
00321         if ( ! alg_samp ) {
00322 
00323           Log::instance()->error( "Could not find sampler data in the specified model file. Aborting.\n");
00324 
00325           // If user is tracking progress
00326           if ( ! progress_file.empty() ) { 
00327 
00328             // -2 means aborted
00329             progressFileCallback( -2.0, &prog_data );
00330           }
00331 
00332           exit(1);
00333         }
00334 
00335         EnvironmentPtr env = alg_samp->getEnvironment();
00336 
00337         env->changeMask( mask_file );
00338 
00339         // note: no need to change the label in presences & absences, because it 
00340         // will be ignored when generating the points
00341         OccurrencesPtr presences = alg_samp->getPresences();
00342         OccurrencesPtr absences = alg_samp->getAbsences();
00343  
00344         samp = createSampler( env, presences, absences );
00345 
00346         // Overwrite sampler, in case masks are different
00347         alg->setSampler( samp );
00348 
00349         // Normalize environment if necessary
00350         model->setNormalization( env );
00351       }
00352     }
00353     else {
00354 
00355       ConfigurationPtr config = Configuration::readXml( request_file.c_str() );
00356 
00357       EnvironmentPtr env = createEnvironment( config->getSubsection( "Environment" ) );
00358 
00359       if ( ! env ) {
00360 
00361         Log::instance()->error( "Could not create environment object. Aborting.\n");
00362 
00363         // If user is tracking progress
00364         if ( ! progress_file.empty() ) { 
00365 
00366           // -2 means aborted
00367           progressFileCallback( -2.0, &prog_data );
00368         }
00369 
00370         exit(1);
00371       }
00372 
00373       ConfigurationPtr options_config = config->getSubsection( "Options" );
00374 
00375       num_points = options_config->getAttributeAsInt( "NumPoints", 0 );
00376 
00377       if ( num_points <= 0 ) {
00378 
00379         Log::instance()->error( "Please specify a valid (> 0) number of points to be generated. Aborting.\n");
00380 
00381         // If user is tracking progress
00382         if ( ! progress_file.empty() ) { 
00383 
00384           // -2 means aborted
00385           progressFileCallback( -2.0, &prog_data );
00386         }
00387 
00388         exit(1);
00389       }
00390 
00391       try {
00392 
00393         label = options_config->getAttribute( "Label" );
00394       }
00395       catch ( AttributeNotFound& e ) { 
00396 
00397         // optional attribute
00398         UNUSED(e);
00399       }
00400 
00401       try {
00402 
00403         proportion = options_config->getAttributeAsDouble( "ProportionOfAbsences", 1.0 );
00404 
00405         if ( proportion > 1.0 ) {
00406 
00407           proportion = 1.0;
00408         }
00409         else if ( proportion < 0.0 ) {
00410 
00411           proportion = 0.0;
00412         }
00413       }
00414       catch ( AttributeNotFound& e ) { 
00415 
00416         // optional attribute
00417         UNUSED(e);
00418       }
00419 
00420       OccurrencesPtr presences( new OccurrencesImpl( label ) );
00421       OccurrencesPtr absences( new OccurrencesImpl( label ) );
00422 
00423       samp = createSampler( env, presences, absences );
00424       
00425       ConstConfigurationPtr occ_filter_config = options_config->getSubsection( "OccurrencesFilter", false );
00426 
00427       if ( occ_filter_config ) {
00428 
00429         ConstConfigurationPtr su_config = occ_filter_config->getSubsection( "SpatiallyUnique", false );
00430 
00431         if ( su_config ) {
00432 
00433           samp->spatiallyUnique();
00434         }
00435 
00436         ConstConfigurationPtr eu_config = occ_filter_config->getSubsection( "EnvironmentallyUnique", false );
00437 
00438         if ( eu_config ) {
00439 
00440           samp->environmentallyUnique();
00441         }
00442       }
00443     }
00444     
00445     if ( ! samp ) {
00446 
00447       Log::instance()->error( "Could not create sampler object. Aborting.\n");
00448 
00449       // If user is tracking progress
00450       if ( ! progress_file.empty() ) { 
00451 
00452         // -2 means aborted
00453         progressFileCallback( -2.0, &prog_data );
00454       }
00455 
00456       exit(1);
00457     }
00458 
00459     num_absences_to_be_generated = (int)(num_points * proportion);
00460 
00461     SamplerPtr new_samp;
00462 
00463     OccurrencesPtr new_presences( new OccurrencesImpl( 1.0 ) );
00464 
00465     if ( num_absences_to_be_generated < num_points ) {
00466 
00467       new_presences = samp->getPseudoPresences( (num_points-num_absences_to_be_generated), model, threshold, geo_unique, env_unique, sequence_start );
00468       new_presences->setLabel( label );
00469     }
00470 
00471     OccurrencesPtr new_absences( new OccurrencesImpl( 0.0 ) );
00472 
00473     if ( num_absences_to_be_generated > 0 ) {
00474 
00475       new_absences = samp->getPseudoAbsences( num_absences_to_be_generated, model, threshold, geo_unique, env_unique, sequence_start+num_points-num_absences_to_be_generated );
00476       new_absences->setLabel( label );
00477     }
00478 
00479     new_samp = createSampler( samp->getEnvironment(), new_presences, new_absences );
00480 
00481     // Output
00482     std::cerr << flush;
00483     if ( request_file.empty() ) {
00484 
00485       // No XML request = TXT output
00486       std::streambuf * buf;
00487       std::ofstream of;
00488 
00489       if ( ! result_file.empty() ) {
00490 
00491         of.open( result_file.c_str() );
00492         buf = of.rdbuf();
00493       }
00494       else {
00495 
00496         buf = std::cout.rdbuf();
00497       }
00498 
00499       std::ostream out(buf);
00500       std::cerr << flush;
00501       out << "#id\t" << "label\t" << "long\t" << "lat\t" << "abundance" << endl << flush;
00502 
00503       OccurrencesImpl::iterator it = new_presences->begin();
00504       OccurrencesImpl::iterator end = new_presences->end();
00505 
00506       // Presences
00507       int i = 0;
00508       while ( it != end ) {
00509 
00510         std::cerr << flush;
00511         out << sequence_start + i << "\t" << label.c_str() << "\t" << std::setprecision(9) << (*it)->x() << "\t" << (*it)->y() << "\t1" << endl << flush;
00512         it++;
00513         i++;
00514       }
00515       
00516       it = new_absences->begin();
00517       end = new_absences->end();
00518 
00519       // Absences
00520       while ( it != end ) {
00521 
00522         std::cerr << flush;
00523         out << sequence_start + i << "\t" << label.c_str() << "\t" << std::setprecision(9) << (*it)->x() << "\t" << (*it)->y() << "\t0" << endl << flush;
00524         it++;
00525         i++;
00526       }
00527     }
00528     else {
00529 
00530       // XML output
00531 
00532       std::ostringstream output;
00533 
00534       Configuration::writeXml( new_samp->getConfiguration(), output );
00535 
00536       // Write output to file, if requested
00537       if ( ! result_file.empty() ) {
00538 
00539         ofstream file( result_file.c_str() );
00540         file << output.str();
00541         file.close();
00542       }
00543       else {
00544 
00545         // Otherwise send it to stdout
00546         std::cout << output.str().c_str() << endl << flush;
00547       }
00548     }
00549 
00550     // If user wants to track progress
00551     if ( ! progress_file.empty() ) { 
00552 
00553       // Indicate that the job is finished
00554       progressFileCallback( 1.0, &prog_data );
00555     }
00556   }
00557   catch ( runtime_error e ) {
00558 
00559     // If user is tracking progress
00560     if ( ! progress_file.empty() ) { 
00561 
00562       // -2 means aborted
00563       progressFileCallback( -2.0, &prog_data );
00564     }
00565 
00566     printf( "om_pseudo: %s\n", e.what() );
00567     exit(1);
00568   }
00569 
00570   return 0;
00571 }