openModeller
Version 1.4.0
|
00001 #include <openmodeller/om.hh> 00002 #include <openmodeller/os_specific.hh> 00003 #include <openmodeller/Exceptions.hh> 00004 00005 #include "getopts/getopts.h" 00006 00007 #include "om_cmd_utils.hh" 00008 00009 #include <time.h> // used to limit the number of times that the progress is written to a file 00010 #include <string> 00011 #include <stdexcept> 00012 #include <iomanip> // std::setprecision 00013 #include <sstream> 00014 00015 using namespace std; 00016 00017 int main( int argc, char **argv ) { 00018 00019 Options opts; 00020 int option; 00021 00022 // command-line parameters (short name, long name, description, take args) 00023 opts.addOption( "v", "version" , "Display the version info" , false ); 00024 opts.addOption( "r", "xml-req" , "(option 1) Model evaluation request file in XML" , true ); 00025 opts.addOption( "n", "num-points" , "(option 2) Number of points to be generated" , true ); 00026 opts.addOption( "l", "label" , "(option 2) Label for the points" , true ); 00027 opts.addOption( "q", "seq-start" , "(option 2) Sequence start for points id" , true ); 00028 opts.addOption( "m", "mask" , "(option 2) Mask file" , true ); 00029 opts.addOption( "p", "proportion" , "(option 2) Proportion of absence points (decimals)" , true ); 00030 opts.addOption( "o", "model" , "(option 2) File with serialized model" , true ); 00031 opts.addOption( "t", "threshold" , "(option 2) Model threshold (default 0.5)" , true ); 00032 opts.addOption( "" , "geo-unique" , "(option 2) Avoid repeating same coordinates" , false ); 00033 opts.addOption( "" , "env-unique" , "(option 2) Avoid repeating same environment condition", false ); 00034 opts.addOption( "s", "result" , "File to store result" , true ); 00035 opts.addOption( "" , "log-level" , "Set the log level (debug, warn, info, error)" , true ); 00036 opts.addOption( "" , "log-file" , "Log file" , true ); 00037 opts.addOption( "" , "prog-file" , "File to store progress" , true ); 00038 opts.addOption( "c", "config-file", "Configuration file for openModeller" , true ); 00039 00040 std::string log_level("info"); 00041 std::string request_file; 00042 std::string num_points_string; 00043 int num_points = 0; 00044 std::string label("label"); 00045 std::string sequence_start_string; 00046 int sequence_start = 1; 00047 std::string mask_file; 00048 std::string proportion_string; 00049 double proportion = 1.0; 00050 int num_absences_to_be_generated; 00051 std::string model_file; 00052 std::string threshold_string; 00053 double threshold = 0.5; 00054 bool geo_unique = false; 00055 bool env_unique = false; 00056 std::string result_file; 00057 std::string log_file; 00058 std::string progress_file; 00059 std::string config_file; 00060 00061 if ( ! opts.parse( argc, argv ) ) { 00062 00063 opts.showHelp( argv[0] ); 00064 exit(0); 00065 } 00066 00067 // Set up any related external resources 00068 setupExternalResources(); 00069 00070 OpenModeller om; 00071 00072 while ( ( option = opts.cycle() ) >= 0 ) { 00073 00074 switch ( option ) { 00075 00076 case 0: 00077 printf( "om_pseudo %s\n", om.getVersion().c_str() ); 00078 printf("This is free software; see the source for copying conditions. There is NO\n"); 00079 printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); 00080 exit(0); 00081 break; 00082 case 1: 00083 request_file = opts.getArgs( option ); 00084 break; 00085 case 2: 00086 num_points_string = opts.getArgs( option ); 00087 break; 00088 case 3: 00089 label = opts.getArgs( option ); 00090 break; 00091 case 4: 00092 sequence_start_string = opts.getArgs( option ); 00093 break; 00094 case 5: 00095 mask_file = opts.getArgs( option ); 00096 break; 00097 case 6: 00098 proportion_string = opts.getArgs( option ); 00099 break; 00100 case 7: 00101 model_file = opts.getArgs( option ); 00102 break; 00103 case 8: 00104 threshold_string = opts.getArgs( option ); 00105 break; 00106 case 9: 00107 geo_unique = true; 00108 break; 00109 case 10: 00110 env_unique = true; 00111 break; 00112 case 11: 00113 result_file = opts.getArgs( option ); 00114 break; 00115 case 12: 00116 log_level = opts.getArgs( option ); 00117 break; 00118 case 13: 00119 log_file = opts.getArgs( option ); 00120 break; 00121 case 14: 00122 progress_file = opts.getArgs( option ); 00123 break; 00124 case 15: 00125 config_file = opts.getArgs( option ); 00126 break; 00127 default: 00128 break; 00129 } 00130 } 00131 00132 // om configuration 00133 if ( ! config_file.empty() ) { 00134 00135 Settings::loadConfig( config_file ); 00136 } 00137 00138 // Log stuff 00139 00140 Log::Level level_code = getLogLevel( log_level ); 00141 00142 if ( ! log_file.empty() ) { 00143 00144 Log::instance()->set( level_code, log_file, "" ); 00145 } 00146 else { 00147 00148 // Just set the level - things will go to stderr 00149 Log::instance()->setLevel( level_code ); 00150 } 00151 00152 // Check parameters 00153 00154 if ( request_file.empty() ) { 00155 00156 if ( num_points_string.empty() ) { 00157 00158 printf( "Please specify the number of points to be generated\n"); 00159 exit(1); 00160 } 00161 00162 num_points = atoi( num_points_string.c_str() ); 00163 00164 if ( num_points <= 0 ) { 00165 00166 printf( "Please specify a valid (> 0) number of points to be generated\n"); 00167 exit(1); 00168 } 00169 00170 if ( mask_file.empty() ) { 00171 00172 printf( "Please specify a mask file\n"); 00173 exit(1); 00174 } 00175 00176 if ( ! sequence_start_string.empty() ) { 00177 00178 sequence_start = atoi( sequence_start_string.c_str() ); 00179 } 00180 00181 if ( proportion_string.empty() ) { 00182 00183 proportion_string = "1"; 00184 } 00185 00186 proportion = atof( proportion_string.c_str() ); 00187 00188 if ( proportion > 1.0 ) { 00189 00190 proportion = 1.0; 00191 } 00192 else if ( proportion < 0.0 ) { 00193 00194 proportion = 0.0; 00195 } 00196 00197 if ( ! threshold_string.empty() ) { 00198 00199 threshold = atof( threshold_string.c_str() ); 00200 } 00201 00202 if ( threshold <= 0.0 ) { 00203 00204 printf( "Model threshold must be greater than zero\n"); 00205 exit(1); 00206 } 00207 00208 if ( threshold >= 1.0 ) { 00209 00210 printf( "Model threshold must be smaller than one\n"); 00211 exit(1); 00212 } 00213 } 00214 else { 00215 00216 if ( ! num_points_string.empty() ) { 00217 00218 Log::instance()->warn( "num-points parameter will be ignored (using XML request instead)\n" ); 00219 } 00220 if ( label.compare("label") != 0 ) { 00221 00222 Log::instance()->warn( "label parameter will be ignored (using XML request instead)\n" ); 00223 } 00224 if ( ! sequence_start_string.empty() ) { 00225 00226 Log::instance()->warn( "seq-start parameter will be ignored (using XML request instead)\n" ); 00227 } 00228 if ( ! mask_file.empty() ) { 00229 00230 Log::instance()->warn( "mask parameter will be ignored (using XML request instead)\n" ); 00231 } 00232 if ( ! proportion_string.empty() ) { 00233 00234 Log::instance()->warn( "proportion parameter will be ignored (using XML request instead)\n" ); 00235 } 00236 if ( ! model_file.empty() ) { 00237 00238 Log::instance()->warn( "model parameter will be ignored (using XML request instead)\n" ); 00239 } 00240 if ( ! threshold_string.empty() ) { 00241 00242 Log::instance()->warn( "threshold parameter will be ignored (using XML request instead)\n" ); 00243 } 00244 if ( geo_unique ) { 00245 00246 Log::instance()->warn( "geo-unique parameter will be ignored (using XML request instead)\n" ); 00247 } 00248 if ( env_unique ) { 00249 00250 Log::instance()->warn( "env-unique parameter will be ignored (using XML request instead)\n" ); 00251 } 00252 } 00253 00254 // Initialize progress data if user wants to track progress 00255 progress_data prog_data; 00256 00257 if ( ! progress_file.empty() ) { 00258 00259 prog_data.file_name = progress_file; 00260 00261 time( &prog_data.timestamp ); 00262 00263 prog_data.progress = -1.0; // queued 00264 00265 // Always create initial file with progress 0 00266 progressFileCallback( 0.0, &prog_data ); 00267 } 00268 00269 // Real work 00270 00271 try { 00272 00273 SamplerPtr samp; 00274 00275 Model model = 0; 00276 00277 if ( request_file.empty() ) { 00278 00279 if ( model_file.empty() ) { 00280 00281 std::vector<std::string> categorical_layers, continuous_layers; 00282 00283 continuous_layers.push_back( mask_file ); // need to add at least one layer 00284 00285 EnvironmentPtr env = createEnvironment( categorical_layers, continuous_layers ); 00286 00287 if ( ! env ) { 00288 00289 Log::instance()->error( "Could not create environment object. Aborting.\n"); 00290 00291 // If user is tracking progress 00292 if ( ! progress_file.empty() ) { 00293 00294 // -2 means aborted 00295 progressFileCallback( -2.0, &prog_data ); 00296 } 00297 00298 exit(1); 00299 } 00300 00301 OccurrencesPtr presences( new OccurrencesImpl( label ) ); 00302 OccurrencesPtr absences( new OccurrencesImpl( label ) ); 00303 00304 samp = createSampler( env, presences, absences ); 00305 } 00306 else { 00307 00308 // Load available algorithms 00309 AlgorithmFactory::searchDefaultDirs(); 00310 00311 // Load serialized model 00312 ConfigurationPtr config = Configuration::readXml( model_file.c_str() ); 00313 00314 AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) ); 00315 00316 model = alg->getModel(); 00317 00318 // note: alg deserialization doesn't include sampler stuff 00319 SamplerPtr alg_samp = createSampler( config->getSubsection( "Sampler" ) ); 00320 00321 if ( ! alg_samp ) { 00322 00323 Log::instance()->error( "Could not find sampler data in the specified model file. Aborting.\n"); 00324 00325 // If user is tracking progress 00326 if ( ! progress_file.empty() ) { 00327 00328 // -2 means aborted 00329 progressFileCallback( -2.0, &prog_data ); 00330 } 00331 00332 exit(1); 00333 } 00334 00335 EnvironmentPtr env = alg_samp->getEnvironment(); 00336 00337 env->changeMask( mask_file ); 00338 00339 // note: no need to change the label in presences & absences, because it 00340 // will be ignored when generating the points 00341 OccurrencesPtr presences = alg_samp->getPresences(); 00342 OccurrencesPtr absences = alg_samp->getAbsences(); 00343 00344 samp = createSampler( env, presences, absences ); 00345 00346 // Overwrite sampler, in case masks are different 00347 alg->setSampler( samp ); 00348 00349 // Normalize environment if necessary 00350 model->setNormalization( env ); 00351 } 00352 } 00353 else { 00354 00355 ConfigurationPtr config = Configuration::readXml( request_file.c_str() ); 00356 00357 EnvironmentPtr env = createEnvironment( config->getSubsection( "Environment" ) ); 00358 00359 if ( ! env ) { 00360 00361 Log::instance()->error( "Could not create environment object. Aborting.\n"); 00362 00363 // If user is tracking progress 00364 if ( ! progress_file.empty() ) { 00365 00366 // -2 means aborted 00367 progressFileCallback( -2.0, &prog_data ); 00368 } 00369 00370 exit(1); 00371 } 00372 00373 ConfigurationPtr options_config = config->getSubsection( "Options" ); 00374 00375 num_points = options_config->getAttributeAsInt( "NumPoints", 0 ); 00376 00377 if ( num_points <= 0 ) { 00378 00379 Log::instance()->error( "Please specify a valid (> 0) number of points to be generated. Aborting.\n"); 00380 00381 // If user is tracking progress 00382 if ( ! progress_file.empty() ) { 00383 00384 // -2 means aborted 00385 progressFileCallback( -2.0, &prog_data ); 00386 } 00387 00388 exit(1); 00389 } 00390 00391 try { 00392 00393 label = options_config->getAttribute( "Label" ); 00394 } 00395 catch ( AttributeNotFound& e ) { 00396 00397 // optional attribute 00398 UNUSED(e); 00399 } 00400 00401 try { 00402 00403 proportion = options_config->getAttributeAsDouble( "ProportionOfAbsences", 1.0 ); 00404 00405 if ( proportion > 1.0 ) { 00406 00407 proportion = 1.0; 00408 } 00409 else if ( proportion < 0.0 ) { 00410 00411 proportion = 0.0; 00412 } 00413 } 00414 catch ( AttributeNotFound& e ) { 00415 00416 // optional attribute 00417 UNUSED(e); 00418 } 00419 00420 OccurrencesPtr presences( new OccurrencesImpl( label ) ); 00421 OccurrencesPtr absences( new OccurrencesImpl( label ) ); 00422 00423 samp = createSampler( env, presences, absences ); 00424 00425 ConstConfigurationPtr occ_filter_config = options_config->getSubsection( "OccurrencesFilter", false ); 00426 00427 if ( occ_filter_config ) { 00428 00429 ConstConfigurationPtr su_config = occ_filter_config->getSubsection( "SpatiallyUnique", false ); 00430 00431 if ( su_config ) { 00432 00433 samp->spatiallyUnique(); 00434 } 00435 00436 ConstConfigurationPtr eu_config = occ_filter_config->getSubsection( "EnvironmentallyUnique", false ); 00437 00438 if ( eu_config ) { 00439 00440 samp->environmentallyUnique(); 00441 } 00442 } 00443 } 00444 00445 if ( ! samp ) { 00446 00447 Log::instance()->error( "Could not create sampler object. Aborting.\n"); 00448 00449 // If user is tracking progress 00450 if ( ! progress_file.empty() ) { 00451 00452 // -2 means aborted 00453 progressFileCallback( -2.0, &prog_data ); 00454 } 00455 00456 exit(1); 00457 } 00458 00459 num_absences_to_be_generated = (int)(num_points * proportion); 00460 00461 SamplerPtr new_samp; 00462 00463 OccurrencesPtr new_presences( new OccurrencesImpl( 1.0 ) ); 00464 00465 if ( num_absences_to_be_generated < num_points ) { 00466 00467 new_presences = samp->getPseudoPresences( (num_points-num_absences_to_be_generated), model, threshold, geo_unique, env_unique, sequence_start ); 00468 new_presences->setLabel( label ); 00469 } 00470 00471 OccurrencesPtr new_absences( new OccurrencesImpl( 0.0 ) ); 00472 00473 if ( num_absences_to_be_generated > 0 ) { 00474 00475 new_absences = samp->getPseudoAbsences( num_absences_to_be_generated, model, threshold, geo_unique, env_unique, sequence_start+num_points-num_absences_to_be_generated ); 00476 new_absences->setLabel( label ); 00477 } 00478 00479 new_samp = createSampler( samp->getEnvironment(), new_presences, new_absences ); 00480 00481 // Output 00482 std::cerr << flush; 00483 if ( request_file.empty() ) { 00484 00485 // No XML request = TXT output 00486 std::streambuf * buf; 00487 std::ofstream of; 00488 00489 if ( ! result_file.empty() ) { 00490 00491 of.open( result_file.c_str() ); 00492 buf = of.rdbuf(); 00493 } 00494 else { 00495 00496 buf = std::cout.rdbuf(); 00497 } 00498 00499 std::ostream out(buf); 00500 std::cerr << flush; 00501 out << "#id\t" << "label\t" << "long\t" << "lat\t" << "abundance" << endl << flush; 00502 00503 OccurrencesImpl::iterator it = new_presences->begin(); 00504 OccurrencesImpl::iterator end = new_presences->end(); 00505 00506 // Presences 00507 int i = 0; 00508 while ( it != end ) { 00509 00510 std::cerr << flush; 00511 out << sequence_start + i << "\t" << label.c_str() << "\t" << std::setprecision(9) << (*it)->x() << "\t" << (*it)->y() << "\t1" << endl << flush; 00512 it++; 00513 i++; 00514 } 00515 00516 it = new_absences->begin(); 00517 end = new_absences->end(); 00518 00519 // Absences 00520 while ( it != end ) { 00521 00522 std::cerr << flush; 00523 out << sequence_start + i << "\t" << label.c_str() << "\t" << std::setprecision(9) << (*it)->x() << "\t" << (*it)->y() << "\t0" << endl << flush; 00524 it++; 00525 i++; 00526 } 00527 } 00528 else { 00529 00530 // XML output 00531 00532 std::ostringstream output; 00533 00534 Configuration::writeXml( new_samp->getConfiguration(), output ); 00535 00536 // Write output to file, if requested 00537 if ( ! result_file.empty() ) { 00538 00539 ofstream file( result_file.c_str() ); 00540 file << output.str(); 00541 file.close(); 00542 } 00543 else { 00544 00545 // Otherwise send it to stdout 00546 std::cout << output.str().c_str() << endl << flush; 00547 } 00548 } 00549 00550 // If user wants to track progress 00551 if ( ! progress_file.empty() ) { 00552 00553 // Indicate that the job is finished 00554 progressFileCallback( 1.0, &prog_data ); 00555 } 00556 } 00557 catch ( runtime_error e ) { 00558 00559 // If user is tracking progress 00560 if ( ! progress_file.empty() ) { 00561 00562 // -2 means aborted 00563 progressFileCallback( -2.0, &prog_data ); 00564 } 00565 00566 printf( "om_pseudo: %s\n", e.what() ); 00567 exit(1); 00568 } 00569 00570 return 0; 00571 }