openModeller  Version 1.5.0
om_pseudo.cpp
Go to the documentation of this file.
1 #include <openmodeller/om.hh>
4 
5 #include "getopts/getopts.h"
6 
7 #include "om_cmd_utils.hh"
8 
9 #include <time.h> // used to limit the number of times that the progress is written to a file
10 #include <string>
11 #include <stdexcept>
12 #include <iomanip> // std::setprecision
13 #include <sstream>
14 
15 using namespace std;
16 
17 int main( int argc, char **argv ) {
18 
19  Options opts;
20  int option;
21 
22  // command-line parameters (short name, long name, description, take args)
23  opts.addOption( "v", "version" , "Display the version info" , false );
24  opts.addOption( "r", "xml-req" , "(option 1) Model evaluation request file in XML" , true );
25  opts.addOption( "n", "num-points" , "(option 2) Number of points to be generated" , true );
26  opts.addOption( "l", "label" , "(option 2) Label for the points" , true );
27  opts.addOption( "q", "seq-start" , "(option 2) Sequence start for points id" , true );
28  opts.addOption( "m", "mask" , "(option 2) Mask file" , true );
29  opts.addOption( "p", "proportion" , "(option 2) Proportion of absence points (decimals)" , true );
30  opts.addOption( "o", "model" , "(option 2) File with serialized model" , true );
31  opts.addOption( "t", "threshold" , "(option 2) Model threshold (default 0.5)" , true );
32  opts.addOption( "" , "geo-unique" , "(option 2) Avoid repeating same coordinates" , false );
33  opts.addOption( "" , "env-unique" , "(option 2) Avoid repeating same environment condition", false );
34  opts.addOption( "s", "result" , "File to store result" , true );
35  opts.addOption( "" , "log-level" , "Set the log level (debug, warn, info, error)" , true );
36  opts.addOption( "" , "log-file" , "Log file" , true );
37  opts.addOption( "" , "prog-file" , "File to store progress" , true );
38  opts.addOption( "c", "config-file", "Configuration file for openModeller" , true );
39 
40  std::string log_level("info");
41  std::string request_file;
42  std::string num_points_string;
43  int num_points = 0;
44  std::string label("label");
45  std::string sequence_start_string;
46  int sequence_start = 1;
47  std::string mask_file;
48  std::string proportion_string;
49  double proportion = 1.0;
50  int num_absences_to_be_generated;
51  std::string model_file;
52  std::string threshold_string;
53  double threshold = 0.5;
54  bool geo_unique = false;
55  bool env_unique = false;
56  std::string result_file;
57  std::string log_file;
58  std::string progress_file;
59  std::string config_file;
60 
61  if ( ! opts.parse( argc, argv ) ) {
62 
63  opts.showHelp( argv[0] );
64  exit(0);
65  }
66 
67  // Set up any related external resources
69 
70  OpenModeller om;
71 
72  while ( ( option = opts.cycle() ) >= 0 ) {
73 
74  switch ( option ) {
75 
76  case 0:
77  printf( "om_pseudo %s\n", om.getVersion().c_str() );
78  printf("This is free software; see the source for copying conditions. There is NO\n");
79  printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
80  exit(0);
81  break;
82  case 1:
83  request_file = opts.getArgs( option );
84  break;
85  case 2:
86  num_points_string = opts.getArgs( option );
87  break;
88  case 3:
89  label = opts.getArgs( option );
90  break;
91  case 4:
92  sequence_start_string = opts.getArgs( option );
93  break;
94  case 5:
95  mask_file = opts.getArgs( option );
96  break;
97  case 6:
98  proportion_string = opts.getArgs( option );
99  break;
100  case 7:
101  model_file = opts.getArgs( option );
102  break;
103  case 8:
104  threshold_string = opts.getArgs( option );
105  break;
106  case 9:
107  geo_unique = true;
108  break;
109  case 10:
110  env_unique = true;
111  break;
112  case 11:
113  result_file = opts.getArgs( option );
114  break;
115  case 12:
116  log_level = opts.getArgs( option );
117  break;
118  case 13:
119  log_file = opts.getArgs( option );
120  break;
121  case 14:
122  progress_file = opts.getArgs( option );
123  break;
124  case 15:
125  config_file = opts.getArgs( option );
126  break;
127  default:
128  break;
129  }
130  }
131 
132  // Log stuff
133 
134  Log::Level level_code = getLogLevel( log_level );
135 
136  if ( ! log_file.empty() ) {
137 
138  Log::instance()->set( level_code, log_file, "" );
139  }
140  else {
141 
142  // Just set the level - things will go to stderr
143  Log::instance()->setLevel( level_code );
144  }
145 
146  // om configuration
147  if ( ! config_file.empty() ) {
148 
149  Settings::loadConfig( config_file );
150  }
151 
152  // Check parameters
153 
154  if ( request_file.empty() ) {
155 
156  if ( num_points_string.empty() ) {
157 
158  printf( "Please specify the number of points to be generated\n");
159  exit(1);
160  }
161 
162  num_points = atoi( num_points_string.c_str() );
163 
164  if ( num_points <= 0 ) {
165 
166  printf( "Please specify a valid (> 0) number of points to be generated\n");
167  exit(1);
168  }
169 
170  if ( mask_file.empty() ) {
171 
172  printf( "Please specify a mask file\n");
173  exit(1);
174  }
175 
176  if ( ! sequence_start_string.empty() ) {
177 
178  sequence_start = atoi( sequence_start_string.c_str() );
179  }
180 
181  if ( proportion_string.empty() ) {
182 
183  proportion_string = "1";
184  }
185 
186  proportion = atof( proportion_string.c_str() );
187 
188  if ( proportion > 1.0 ) {
189 
190  proportion = 1.0;
191  }
192  else if ( proportion < 0.0 ) {
193 
194  proportion = 0.0;
195  }
196 
197  if ( ! threshold_string.empty() ) {
198 
199  threshold = atof( threshold_string.c_str() );
200  }
201 
202  if ( threshold <= 0.0 ) {
203 
204  printf( "Model threshold must be greater than zero\n");
205  exit(1);
206  }
207 
208  if ( threshold >= 1.0 ) {
209 
210  printf( "Model threshold must be smaller than one\n");
211  exit(1);
212  }
213  }
214  else {
215 
216  if ( ! num_points_string.empty() ) {
217 
218  Log::instance()->warn( "num-points parameter will be ignored (using XML request instead)\n" );
219  }
220  if ( label.compare("label") != 0 ) {
221 
222  Log::instance()->warn( "label parameter will be ignored (using XML request instead)\n" );
223  }
224  if ( ! sequence_start_string.empty() ) {
225 
226  Log::instance()->warn( "seq-start parameter will be ignored (using XML request instead)\n" );
227  }
228  if ( ! mask_file.empty() ) {
229 
230  Log::instance()->warn( "mask parameter will be ignored (using XML request instead)\n" );
231  }
232  if ( ! proportion_string.empty() ) {
233 
234  Log::instance()->warn( "proportion parameter will be ignored (using XML request instead)\n" );
235  }
236  if ( ! model_file.empty() ) {
237 
238  Log::instance()->warn( "model parameter will be ignored (using XML request instead)\n" );
239  }
240  if ( ! threshold_string.empty() ) {
241 
242  Log::instance()->warn( "threshold parameter will be ignored (using XML request instead)\n" );
243  }
244  if ( geo_unique ) {
245 
246  Log::instance()->warn( "geo-unique parameter will be ignored (using XML request instead)\n" );
247  }
248  if ( env_unique ) {
249 
250  Log::instance()->warn( "env-unique parameter will be ignored (using XML request instead)\n" );
251  }
252  }
253 
254  // Initialize progress data if user wants to track progress
255  progress_data prog_data;
256 
257  if ( ! progress_file.empty() ) {
258 
259  prog_data.file_name = progress_file;
260 
261  time( &prog_data.timestamp );
262 
263  prog_data.progress = -1.0; // queued
264 
265  // Always create initial file with progress 0
266  progressFileCallback( 0.0, &prog_data );
267  }
268 
269  // Real work
270 
271  try {
272 
273  SamplerPtr samp;
274 
275  Model model = 0;
276 
277  if ( request_file.empty() ) {
278 
279  if ( model_file.empty() ) {
280 
281  std::vector<std::string> categorical_layers, continuous_layers;
282 
283  continuous_layers.push_back( mask_file ); // need to add at least one layer
284 
285  EnvironmentPtr env = createEnvironment( categorical_layers, continuous_layers );
286 
287  if ( ! env ) {
288 
289  Log::instance()->error( "Could not create environment object. Aborting.\n");
290 
291  // If user is tracking progress
292  if ( ! progress_file.empty() ) {
293 
294  // -2 means aborted
295  progressFileCallback( -2.0, &prog_data );
296  }
297 
298  exit(1);
299  }
300 
301  OccurrencesPtr presences( new OccurrencesImpl( label ) );
302  OccurrencesPtr absences( new OccurrencesImpl( label ) );
303 
304  samp = createSampler( env, presences, absences );
305  }
306  else {
307 
308  // Load available algorithms
310 
311  // Load serialized model
312  ConfigurationPtr config = Configuration::readXml( model_file.c_str() );
313 
314  AlgorithmPtr alg = AlgorithmFactory::newAlgorithm( config->getSubsection( "Algorithm" ) );
315 
316  model = alg->getModel();
317 
318  // note: alg deserialization doesn't include sampler stuff
319  SamplerPtr alg_samp = createSampler( config->getSubsection( "Sampler" ) );
320 
321  if ( ! alg_samp ) {
322 
323  Log::instance()->error( "Could not find sampler data in the specified model file. Aborting.\n");
324 
325  // If user is tracking progress
326  if ( ! progress_file.empty() ) {
327 
328  // -2 means aborted
329  progressFileCallback( -2.0, &prog_data );
330  }
331 
332  exit(1);
333  }
334 
335  EnvironmentPtr env = alg_samp->getEnvironment();
336 
337  env->changeMask( mask_file );
338 
339  // note: no need to change the label in presences & absences, because it
340  // will be ignored when generating the points
341  OccurrencesPtr presences = alg_samp->getPresences();
342  OccurrencesPtr absences = alg_samp->getAbsences();
343 
344  samp = createSampler( env, presences, absences );
345 
346  // Overwrite sampler, in case masks are different
347  alg->setSampler( samp );
348 
349  // Normalize environment if necessary
350  model->setNormalization( env );
351  }
352  }
353  else {
354 
355  ConfigurationPtr config = Configuration::readXml( request_file.c_str() );
356 
357  EnvironmentPtr env = createEnvironment( config->getSubsection( "Environment" ) );
358 
359  if ( ! env ) {
360 
361  Log::instance()->error( "Could not create environment object. Aborting.\n");
362 
363  // If user is tracking progress
364  if ( ! progress_file.empty() ) {
365 
366  // -2 means aborted
367  progressFileCallback( -2.0, &prog_data );
368  }
369 
370  exit(1);
371  }
372 
373  ConfigurationPtr options_config = config->getSubsection( "Options" );
374 
375  num_points = options_config->getAttributeAsInt( "NumPoints", 0 );
376 
377  if ( num_points <= 0 ) {
378 
379  Log::instance()->error( "Please specify a valid (> 0) number of points to be generated. Aborting.\n");
380 
381  // If user is tracking progress
382  if ( ! progress_file.empty() ) {
383 
384  // -2 means aborted
385  progressFileCallback( -2.0, &prog_data );
386  }
387 
388  exit(1);
389  }
390 
391  try {
392 
393  label = options_config->getAttribute( "Label" );
394  }
395  catch ( AttributeNotFound& e ) {
396 
397  // optional attribute
398  UNUSED(e);
399  }
400 
401  try {
402 
403  proportion = options_config->getAttributeAsDouble( "ProportionOfAbsences", 1.0 );
404 
405  if ( proportion > 1.0 ) {
406 
407  proportion = 1.0;
408  }
409  else if ( proportion < 0.0 ) {
410 
411  proportion = 0.0;
412  }
413  }
414  catch ( AttributeNotFound& e ) {
415 
416  // optional attribute
417  UNUSED(e);
418  }
419 
420  OccurrencesPtr presences( new OccurrencesImpl( label ) );
421  OccurrencesPtr absences( new OccurrencesImpl( label ) );
422 
423  samp = createSampler( env, presences, absences );
424 
425  ConstConfigurationPtr occ_filter_config = options_config->getSubsection( "OccurrencesFilter", false );
426 
427  if ( occ_filter_config ) {
428 
429  ConstConfigurationPtr su_config = occ_filter_config->getSubsection( "SpatiallyUnique", false );
430 
431  if ( su_config ) {
432 
433  samp->spatiallyUnique();
434  }
435 
436  ConstConfigurationPtr eu_config = occ_filter_config->getSubsection( "EnvironmentallyUnique", false );
437 
438  if ( eu_config ) {
439 
440  samp->environmentallyUnique();
441  }
442  }
443  }
444 
445  if ( ! samp ) {
446 
447  Log::instance()->error( "Could not create sampler object. Aborting.\n");
448 
449  // If user is tracking progress
450  if ( ! progress_file.empty() ) {
451 
452  // -2 means aborted
453  progressFileCallback( -2.0, &prog_data );
454  }
455 
456  exit(1);
457  }
458 
459  num_absences_to_be_generated = (int)(num_points * proportion);
460 
461  SamplerPtr new_samp;
462 
463  OccurrencesPtr new_presences( new OccurrencesImpl( 1.0 ) );
464 
465  if ( num_absences_to_be_generated < num_points ) {
466 
467  new_presences = samp->getPseudoPresences( (num_points-num_absences_to_be_generated), model, threshold, geo_unique, env_unique, sequence_start );
468  new_presences->setLabel( label );
469  }
470 
471  OccurrencesPtr new_absences( new OccurrencesImpl( 0.0 ) );
472 
473  if ( num_absences_to_be_generated > 0 ) {
474 
475  new_absences = samp->getPseudoAbsences( num_absences_to_be_generated, model, threshold, geo_unique, env_unique, sequence_start+num_points-num_absences_to_be_generated );
476  new_absences->setLabel( label );
477  }
478 
479  new_samp = createSampler( samp->getEnvironment(), new_presences, new_absences );
480 
481  // Output
482  std::cerr << flush;
483  if ( request_file.empty() ) {
484 
485  // No XML request = TXT output
486  std::streambuf * buf;
487  std::ofstream of;
488 
489  if ( ! result_file.empty() ) {
490 
491  of.open( result_file.c_str() );
492  buf = of.rdbuf();
493  }
494  else {
495 
496  buf = std::cout.rdbuf();
497  }
498 
499  std::ostream out(buf);
500  std::cerr << flush;
501  out << "#id\t" << "label\t" << "long\t" << "lat\t" << "abundance" << endl << flush;
502 
503  OccurrencesImpl::iterator it = new_presences->begin();
504  OccurrencesImpl::iterator end = new_presences->end();
505 
506  // Presences
507  int i = 0;
508  while ( it != end ) {
509 
510  std::cerr << flush;
511  out << sequence_start + i << "\t" << label.c_str() << "\t" << std::setprecision(9) << (*it)->x() << "\t" << (*it)->y() << "\t1" << endl << flush;
512  it++;
513  i++;
514  }
515 
516  it = new_absences->begin();
517  end = new_absences->end();
518 
519  // Absences
520  while ( it != end ) {
521 
522  std::cerr << flush;
523  out << sequence_start + i << "\t" << label.c_str() << "\t" << std::setprecision(9) << (*it)->x() << "\t" << (*it)->y() << "\t0" << endl << flush;
524  it++;
525  i++;
526  }
527  }
528  else {
529 
530  // XML output
531 
532  std::ostringstream output;
533 
534  Configuration::writeXml( new_samp->getConfiguration(), output );
535 
536  // Write output to file, if requested
537  if ( ! result_file.empty() ) {
538 
539  ofstream file( result_file.c_str() );
540  file << output.str();
541  file.close();
542  }
543  else {
544 
545  // Otherwise send it to stdout
546  std::cout << output.str().c_str() << endl << flush;
547  }
548  }
549 
550  // If user wants to track progress
551  if ( ! progress_file.empty() ) {
552 
553  // Indicate that the job is finished
554  progressFileCallback( 1.0, &prog_data );
555  }
556  }
557  catch ( runtime_error e ) {
558 
559  // If user is tracking progress
560  if ( ! progress_file.empty() ) {
561 
562  // -2 means aborted
563  progressFileCallback( -2.0, &prog_data );
564  }
565 
566  printf( "om_pseudo: %s\n", e.what() );
567  exit(1);
568  }
569 
570  return 0;
571 }
static void loadConfig(const std::string configFile)
Definition: Settings.cpp:100
std::vector< OccurrencePtr >::iterator iterator
Definition: Occurrences.hh:86
void warn(const char *format,...)
'Warn' level.
Definition: Log.cpp:273
static ConfigurationPtr readXml(char const *filename)
static AlgorithmPtr newAlgorithm(std::string const id)
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
std::string file_name
Definition: om_cmd_utils.hh:42
Log::Level getLogLevel(std::string level)
int main(int argc, char **argv)
Definition: om_pseudo.cpp:17
Level
Definition: Log.hh:54
void setLevel(Level level)
Definition: Log.hh:107
EnvironmentPtr createEnvironment(const std::vector< std::string > &categs, const std::vector< std::string > &maps, const std::string &mask_file)
Definition: Environment.cpp:55
SamplerPtr createSampler(const EnvironmentPtr &env, const OccurrencesPtr &presence, const OccurrencesPtr &absence)
Definition: Sampler.cpp:52
void error(const char *format,...)
'Error' level.
Definition: Log.cpp:290
void setupExternalResources()
Definition: os_specific.cpp:95
#define UNUSED(symbol)
Definition: os_specific.hh:55
void set(Level level, std::string fileName, char const *pref="")
Definition: Log.cpp:196
static void writeXml(const ConstConfigurationPtr &config, char const *fileaname)
static int searchDefaultDirs()
std::string getVersion()
time_t timestamp
Definition: om_cmd_utils.hh:43
void progressFileCallback(float progress, void *progressData)