openModeller  Version 1.4.0
om_evaluate.cpp
Go to the documentation of this file.
00001 #include <openmodeller/om.hh>
00002 #include <openmodeller/Exceptions.hh>
00003 #include <openmodeller/os_specific.hh>
00004 
00005 #include "getopts/getopts.h"
00006 
00007 #include "om_cmd_utils.hh"
00008 
00009 #include <fstream>   // file I/O for XML
00010 #include <sstream>   // ostringstream datatype
00011 #include <stdio.h>   // file I/O for log
00012 #include <time.h>    // used to limit the number of times that the progress is written to a file
00013 #include <string>    // string library
00014 #include <stdexcept> // try/catch
00015 
00016 using namespace std;
00017 
00018 int get_values(AlgorithmPtr alg, EnvironmentPtr env, OccurrencesPtr occs, Scalar * values, int cnt);
00019 
00021 int main( int argc, char **argv ) {
00022 
00023   Options opts;
00024   int option;
00025 
00026   // command-line parameters (short name, long name, description, take args)
00027   opts.addOption( "v", "version"     , "Display version info"                           , false );
00028   opts.addOption( "r", "xml-req"     , "(option 1) Model evaluation request file in XML", true );
00029   opts.addOption( "o", "model"       , "(option 2) Serialized model file"               , true );
00030   opts.addOption( "p", "points"      , "(option 2) TAB-delimited file with points"      , true );
00031   opts.addOption( "s", "result"      , "File to store evaluation result in XML"         , true );
00032   opts.addOption( "" , "log-level"   , "Set the log level (debug, warn, info, error)"   , true );
00033   opts.addOption( "" , "log-file"    , "Log file"                                       , true );
00034   opts.addOption( "" , "prog-file"   , "File to store job progress"                     , true );
00035   opts.addOption( "c", "config-file" , "Configuration file for openModeller"            , true );
00036 
00037   std::string log_level("info");
00038   std::string request_file;
00039   std::string model_file;
00040   std::string points_file;
00041   std::string result_file;
00042   std::string log_file;
00043   std::string progress_file;
00044   std::string config_file;
00045 
00046   if ( ! opts.parse( argc, argv ) ) {
00047 
00048     opts.showHelp( argv[0] );
00049   }
00050 
00051   // Set up any related external resources
00052   setupExternalResources();
00053 
00054   OpenModeller om;
00055 
00056   while ( ( option = opts.cycle() ) >= 0 ) {
00057 
00058     switch ( option ) {
00059 
00060       case 0:
00061         printf( "om_evaluate %s\n", om.getVersion().c_str() );
00062         printf("This is free software; see the source for copying conditions. There is NO\n");
00063         printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
00064         exit(0);
00065         break;
00066       case 1:
00067         request_file = opts.getArgs( option );
00068         break;
00069       case 2:
00070         model_file = opts.getArgs( option );
00071         break;
00072       case 3:
00073         points_file = opts.getArgs( option );
00074         break;
00075       case 4:
00076         result_file = opts.getArgs( option );
00077         break;
00078       case 5:
00079         log_level = opts.getArgs( option );
00080         break;
00081       case 6:
00082         log_file = opts.getArgs( option );
00083         break;
00084       case 7:
00085         progress_file = opts.getArgs( option );
00086         break;
00087       case 8:
00088         config_file = opts.getArgs( option );
00089         break;
00090       default:
00091         break;
00092     }
00093   }
00094 
00095   // om configuration
00096   if ( ! config_file.empty() ) { 
00097 
00098     Settings::loadConfig( config_file );
00099   }
00100 
00101   // Initialize progress data if user wants to track progress
00102   progress_data prog_data;
00103 
00104   if ( ! progress_file.empty() ) { 
00105 
00106     prog_data.file_name = progress_file;
00107 
00108     time( &prog_data.timestamp );
00109 
00110     prog_data.progress = -1.0; // queued
00111 
00112     // Always create initial file with progress 0
00113     progressFileCallback( 0.0, &prog_data );
00114   }
00115 
00116   // Log stuff
00117 
00118   Log::Level level_code = getLogLevel( log_level );
00119 
00120   if ( ! log_file.empty() ) {
00121 
00122     Log::instance()->set( level_code, log_file, "" );
00123   }
00124   else {
00125  
00126     // Just set the level - things will go to stderr
00127     Log::instance()->setLevel( level_code );
00128   }
00129 
00130   // Check parameters
00131 
00132   if ( request_file.empty() ) {
00133 
00134     if ( model_file.empty() || points_file.empty() ) {
00135 
00136       printf( "Please specify either a test request file in XML or a serialized model and a TAB-delimited file with the points to be tested\n");
00137 
00138       // If user is tracking progress
00139       if ( ! progress_file.empty() ) { 
00140 
00141         // -2 means aborted
00142         progressFileCallback( -2.0, &prog_data );
00143       }
00144 
00145       exit(-1);
00146     }
00147   }
00148   else {
00149 
00150     if ( ! model_file.empty() ) {
00151 
00152       Log::instance()->warn( "Model file parameter will be ignored (using XML request instead)\n" );
00153     }
00154     if ( ! points_file.empty() ) {
00155 
00156       Log::instance()->warn( "Points file parameter will be ignored (using XML request instead)\n");
00157     }
00158   }
00159 
00160   // Real work
00161 
00162   try {
00163 
00164     // Load algorithms and instantiate controller class
00165     AlgorithmFactory::searchDefaultDirs();
00166 
00167     // IMPORTANT: data is not deserialized through Sampler objects, which would be much simpler.
00168     //            The reason is that some of the input points may be masked out so they 
00169     //            would be discarded when the sampler cross references occurrences and environment. 
00170     //            The number of evaluations MUST match the number of input points.
00171     AlgorithmPtr alg;
00172     OccurrencesPtr presences;
00173     OccurrencesPtr absences;
00174     EnvironmentPtr env;
00175 
00176     if ( ! request_file.empty() ) {
00177 
00178       // Loading input from XML request
00179 
00180       Log::instance()->debug( "Loading input from XML\n" );
00181 
00182       ConfigurationPtr input = Configuration::readXml( request_file.c_str() );
00183 
00184       alg = AlgorithmFactory::newAlgorithm( input->getSubsection( "Algorithm" ) );
00185 
00186       // Load environment and occurrences separately
00187       ConfigurationPtr sampler_config = input->getSubsection( "Sampler" );
00188 
00189       if ( ConstConfigurationPtr env_config = sampler_config->getSubsection( "Environment", false ) ) {
00190 
00191         env = createEnvironment();
00192         env->setConfiguration( env_config );
00193       }
00194 
00195       if ( ConstConfigurationPtr presences_config = sampler_config->getSubsection( "Presence", false ) ) {
00196 
00197         presences = new OccurrencesImpl(1.0);
00198         presences->setConfiguration( presences_config );
00199       }
00200 
00201       if ( ConstConfigurationPtr absences_config = sampler_config->getSubsection( "Absence", false ) ) {
00202 
00203         absences = new OccurrencesImpl(0.0);
00204         absences->setConfiguration( absences_config );
00205       }
00206     }
00207     else {
00208 
00209       // Loading input from serialized model + TAB-delimited points file
00210 
00211       ConfigurationPtr input = Configuration::readXml( model_file.c_str() );
00212 
00213       alg = AlgorithmFactory::newAlgorithm( input->getSubsection( "Algorithm" ) );
00214 
00215       Log::instance()->debug( "Loading training sampler to get layers, label and spatial reference\n" );
00216 
00217       SamplerPtr training_sampler = createSampler( input->getSubsection( "Sampler" ) );
00218 
00219       // IMPORTANT: environmental scenario is taken from training sampler!
00220       env = training_sampler->getEnvironment();
00221 
00222       // IMPORTANT: label and spatial reference are taken from presence points of the training sampler!
00223       OccurrencesPtr training_presences = training_sampler->getPresences();
00224 
00225       std::string label( training_presences->label() );
00226       std::string spatial_ref( training_presences->coordSystem() );
00227 
00228       Log::instance()->debug( "Loading points %s %s\n", label.c_str(), spatial_ref.c_str() );
00229 
00230       OccurrencesReader* oc_reader = OccurrencesFactory::instance().create( points_file.c_str(), spatial_ref.c_str() );
00231 
00232       presences = oc_reader->getPresences( label.c_str() );
00233       absences = oc_reader->getAbsences( label.c_str() );
00234 
00235       delete oc_reader;
00236     }
00237 
00238     if ( ! alg->done() ) {
00239 
00240       Log::instance()->error( "No model could be found as part of the specified algorithm. Aborting.\n");
00241 
00242       // If user is tracking progress
00243       if ( ! progress_file.empty() ) { 
00244 
00245         // -2 means aborted
00246         progressFileCallback( -2.0, &prog_data );
00247       }
00248 
00249       exit(-1);
00250     }
00251 
00252     if ( alg->needNormalization() ) {
00253 
00254       env->normalize( alg->getNormalizer() );
00255     }
00256     
00257     ConfigurationPtr output( new ConfigurationImpl("Values") );
00258 
00259     int num_presences = 0;
00260     int num_absences = 0;
00261 
00262     if ( presences ) {
00263 
00264       num_presences = presences->numOccurrences();
00265     }
00266     
00267     if ( absences ) {
00268 
00269       num_absences = absences->numOccurrences();
00270     }
00271 
00272     Log::instance()->debug( "Loaded %d presences and %d absences\n", num_presences, num_absences );
00273     
00274     // Evaluate model
00275     Log::instance()->debug( "Starting evaluation\n" );
00276     
00277     Scalar * vs = new (nothrow) Scalar[num_presences + num_absences];
00278     if ( ! vs ) {
00279 
00280       Log::instance()->error( "Not enough memory to allocate model values. Aborting.\n");
00281 
00282       // If user is tracking progress
00283       if ( ! progress_file.empty() ) { 
00284 
00285         // -2 means aborted
00286         progressFileCallback( -2.0, &prog_data );
00287       }
00288 
00289       exit(-1);
00290     }
00291     int cnt = 0;
00292 
00293     if ( presences ) {
00294 
00295       cnt = get_values(alg, env, presences, vs, cnt);
00296     }
00297     if ( absences ) {
00298 
00299       cnt = get_values(alg, env, absences, vs, cnt);
00300     }
00301     
00302     int precision = 5;
00303     
00304     output->addNameValue( "V", vs, num_presences + num_absences, precision );
00305 
00306     std::ostringstream evaluation_output;
00307 
00308     Configuration::writeXml( output, evaluation_output );
00309 
00310     std::cerr << flush;
00311 
00312     // Write test output to file, if requested
00313     if ( ! result_file.empty() ) {
00314 
00315       ofstream file( result_file.c_str() );
00316       file << evaluation_output.str();
00317       file.close();
00318     }
00319     else {
00320 
00321       // Otherwise send it to stdout
00322       std::cout << evaluation_output.str().c_str() << endl << flush;
00323     }
00324 
00325     // If user wants to track progress
00326     if ( ! progress_file.empty() ) { 
00327 
00328       // Indicate that the job is finished
00329       progressFileCallback( 1.0, &prog_data );
00330     }
00331     
00332     delete[] vs;
00333   }
00334   catch ( runtime_error e ) {
00335 
00336     // If user is tracking progress
00337     if ( ! progress_file.empty() ) { 
00338 
00339       // -2 means aborted
00340       progressFileCallback( -2.0, &prog_data );
00341     }
00342 
00343     printf( "om_evaluate aborted: %s\n", e.what() );
00344   }
00345 }
00346 
00347 /******************/
00348 /*** get values ***/
00349 int 
00350 get_values(AlgorithmPtr alg, EnvironmentPtr env, OccurrencesPtr occs, Scalar * values, int cnt) {
00351 
00352   OccurrencesImpl::const_iterator oc;
00353   OccurrencesImpl::const_iterator end;
00354   Scalar val;
00355 
00356   if ( occs->numOccurrences() > 0 ) {
00357 
00358     oc = occs->begin();
00359     end = occs->end();
00360     Sample sample;
00361 
00362     while ( oc != end ) {
00363       
00364       // Use environmental data already provided by the point, if present
00365       // (in this case it will always be unnormalized, so we need to check normalization)
00366       if ( (*oc)->hasEnvironment() ) {
00367 
00368         if ( alg->needNormalization() ) {
00369 
00370           (*oc)->normalize( alg->getNormalizer(), env->numCategoricalLayers() );
00371         }
00372         
00373   sample = (*oc)->environment();
00374       }
00375       else {
00376 
00377         sample = env->get( (*oc)->x(), (*oc)->y() );
00378       }
00379 
00380       if ( sample.size() > 0 ) {
00381 
00382         //sample.dump();
00383         val = alg->getValue( sample );
00384       }
00385       else {
00386         val= -1.0;
00387       }
00388 
00389       //printf( "val=%0.5f\n", val );
00390       values[cnt] = val;
00391       ++cnt;
00392       ++oc;
00393     }
00394   }
00395   
00396   return cnt;
00397 }