openModeller
Version 1.4.0
|
00001 #include <openmodeller/om.hh> 00002 #include <openmodeller/Exceptions.hh> 00003 #include <openmodeller/os_specific.hh> 00004 00005 #include "getopts/getopts.h" 00006 00007 #include "om_cmd_utils.hh" 00008 00009 #include <fstream> // file I/O for XML 00010 #include <sstream> // ostringstream datatype 00011 #include <stdio.h> // file I/O for log 00012 #include <time.h> // used to limit the number of times that the progress is written to a file 00013 #include <string> // string library 00014 #include <stdexcept> // try/catch 00015 00016 using namespace std; 00017 00018 int get_values(AlgorithmPtr alg, EnvironmentPtr env, OccurrencesPtr occs, Scalar * values, int cnt); 00019 00021 int main( int argc, char **argv ) { 00022 00023 Options opts; 00024 int option; 00025 00026 // command-line parameters (short name, long name, description, take args) 00027 opts.addOption( "v", "version" , "Display version info" , false ); 00028 opts.addOption( "r", "xml-req" , "(option 1) Model evaluation request file in XML", true ); 00029 opts.addOption( "o", "model" , "(option 2) Serialized model file" , true ); 00030 opts.addOption( "p", "points" , "(option 2) TAB-delimited file with points" , true ); 00031 opts.addOption( "s", "result" , "File to store evaluation result in XML" , true ); 00032 opts.addOption( "" , "log-level" , "Set the log level (debug, warn, info, error)" , true ); 00033 opts.addOption( "" , "log-file" , "Log file" , true ); 00034 opts.addOption( "" , "prog-file" , "File to store job progress" , true ); 00035 opts.addOption( "c", "config-file" , "Configuration file for openModeller" , true ); 00036 00037 std::string log_level("info"); 00038 std::string request_file; 00039 std::string model_file; 00040 std::string points_file; 00041 std::string result_file; 00042 std::string log_file; 00043 std::string progress_file; 00044 std::string config_file; 00045 00046 if ( ! opts.parse( argc, argv ) ) { 00047 00048 opts.showHelp( argv[0] ); 00049 } 00050 00051 // Set up any related external resources 00052 setupExternalResources(); 00053 00054 OpenModeller om; 00055 00056 while ( ( option = opts.cycle() ) >= 0 ) { 00057 00058 switch ( option ) { 00059 00060 case 0: 00061 printf( "om_evaluate %s\n", om.getVersion().c_str() ); 00062 printf("This is free software; see the source for copying conditions. There is NO\n"); 00063 printf("warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); 00064 exit(0); 00065 break; 00066 case 1: 00067 request_file = opts.getArgs( option ); 00068 break; 00069 case 2: 00070 model_file = opts.getArgs( option ); 00071 break; 00072 case 3: 00073 points_file = opts.getArgs( option ); 00074 break; 00075 case 4: 00076 result_file = opts.getArgs( option ); 00077 break; 00078 case 5: 00079 log_level = opts.getArgs( option ); 00080 break; 00081 case 6: 00082 log_file = opts.getArgs( option ); 00083 break; 00084 case 7: 00085 progress_file = opts.getArgs( option ); 00086 break; 00087 case 8: 00088 config_file = opts.getArgs( option ); 00089 break; 00090 default: 00091 break; 00092 } 00093 } 00094 00095 // om configuration 00096 if ( ! config_file.empty() ) { 00097 00098 Settings::loadConfig( config_file ); 00099 } 00100 00101 // Initialize progress data if user wants to track progress 00102 progress_data prog_data; 00103 00104 if ( ! progress_file.empty() ) { 00105 00106 prog_data.file_name = progress_file; 00107 00108 time( &prog_data.timestamp ); 00109 00110 prog_data.progress = -1.0; // queued 00111 00112 // Always create initial file with progress 0 00113 progressFileCallback( 0.0, &prog_data ); 00114 } 00115 00116 // Log stuff 00117 00118 Log::Level level_code = getLogLevel( log_level ); 00119 00120 if ( ! log_file.empty() ) { 00121 00122 Log::instance()->set( level_code, log_file, "" ); 00123 } 00124 else { 00125 00126 // Just set the level - things will go to stderr 00127 Log::instance()->setLevel( level_code ); 00128 } 00129 00130 // Check parameters 00131 00132 if ( request_file.empty() ) { 00133 00134 if ( model_file.empty() || points_file.empty() ) { 00135 00136 printf( "Please specify either a test request file in XML or a serialized model and a TAB-delimited file with the points to be tested\n"); 00137 00138 // If user is tracking progress 00139 if ( ! progress_file.empty() ) { 00140 00141 // -2 means aborted 00142 progressFileCallback( -2.0, &prog_data ); 00143 } 00144 00145 exit(-1); 00146 } 00147 } 00148 else { 00149 00150 if ( ! model_file.empty() ) { 00151 00152 Log::instance()->warn( "Model file parameter will be ignored (using XML request instead)\n" ); 00153 } 00154 if ( ! points_file.empty() ) { 00155 00156 Log::instance()->warn( "Points file parameter will be ignored (using XML request instead)\n"); 00157 } 00158 } 00159 00160 // Real work 00161 00162 try { 00163 00164 // Load algorithms and instantiate controller class 00165 AlgorithmFactory::searchDefaultDirs(); 00166 00167 // IMPORTANT: data is not deserialized through Sampler objects, which would be much simpler. 00168 // The reason is that some of the input points may be masked out so they 00169 // would be discarded when the sampler cross references occurrences and environment. 00170 // The number of evaluations MUST match the number of input points. 00171 AlgorithmPtr alg; 00172 OccurrencesPtr presences; 00173 OccurrencesPtr absences; 00174 EnvironmentPtr env; 00175 00176 if ( ! request_file.empty() ) { 00177 00178 // Loading input from XML request 00179 00180 Log::instance()->debug( "Loading input from XML\n" ); 00181 00182 ConfigurationPtr input = Configuration::readXml( request_file.c_str() ); 00183 00184 alg = AlgorithmFactory::newAlgorithm( input->getSubsection( "Algorithm" ) ); 00185 00186 // Load environment and occurrences separately 00187 ConfigurationPtr sampler_config = input->getSubsection( "Sampler" ); 00188 00189 if ( ConstConfigurationPtr env_config = sampler_config->getSubsection( "Environment", false ) ) { 00190 00191 env = createEnvironment(); 00192 env->setConfiguration( env_config ); 00193 } 00194 00195 if ( ConstConfigurationPtr presences_config = sampler_config->getSubsection( "Presence", false ) ) { 00196 00197 presences = new OccurrencesImpl(1.0); 00198 presences->setConfiguration( presences_config ); 00199 } 00200 00201 if ( ConstConfigurationPtr absences_config = sampler_config->getSubsection( "Absence", false ) ) { 00202 00203 absences = new OccurrencesImpl(0.0); 00204 absences->setConfiguration( absences_config ); 00205 } 00206 } 00207 else { 00208 00209 // Loading input from serialized model + TAB-delimited points file 00210 00211 ConfigurationPtr input = Configuration::readXml( model_file.c_str() ); 00212 00213 alg = AlgorithmFactory::newAlgorithm( input->getSubsection( "Algorithm" ) ); 00214 00215 Log::instance()->debug( "Loading training sampler to get layers, label and spatial reference\n" ); 00216 00217 SamplerPtr training_sampler = createSampler( input->getSubsection( "Sampler" ) ); 00218 00219 // IMPORTANT: environmental scenario is taken from training sampler! 00220 env = training_sampler->getEnvironment(); 00221 00222 // IMPORTANT: label and spatial reference are taken from presence points of the training sampler! 00223 OccurrencesPtr training_presences = training_sampler->getPresences(); 00224 00225 std::string label( training_presences->label() ); 00226 std::string spatial_ref( training_presences->coordSystem() ); 00227 00228 Log::instance()->debug( "Loading points %s %s\n", label.c_str(), spatial_ref.c_str() ); 00229 00230 OccurrencesReader* oc_reader = OccurrencesFactory::instance().create( points_file.c_str(), spatial_ref.c_str() ); 00231 00232 presences = oc_reader->getPresences( label.c_str() ); 00233 absences = oc_reader->getAbsences( label.c_str() ); 00234 00235 delete oc_reader; 00236 } 00237 00238 if ( ! alg->done() ) { 00239 00240 Log::instance()->error( "No model could be found as part of the specified algorithm. Aborting.\n"); 00241 00242 // If user is tracking progress 00243 if ( ! progress_file.empty() ) { 00244 00245 // -2 means aborted 00246 progressFileCallback( -2.0, &prog_data ); 00247 } 00248 00249 exit(-1); 00250 } 00251 00252 if ( alg->needNormalization() ) { 00253 00254 env->normalize( alg->getNormalizer() ); 00255 } 00256 00257 ConfigurationPtr output( new ConfigurationImpl("Values") ); 00258 00259 int num_presences = 0; 00260 int num_absences = 0; 00261 00262 if ( presences ) { 00263 00264 num_presences = presences->numOccurrences(); 00265 } 00266 00267 if ( absences ) { 00268 00269 num_absences = absences->numOccurrences(); 00270 } 00271 00272 Log::instance()->debug( "Loaded %d presences and %d absences\n", num_presences, num_absences ); 00273 00274 // Evaluate model 00275 Log::instance()->debug( "Starting evaluation\n" ); 00276 00277 Scalar * vs = new (nothrow) Scalar[num_presences + num_absences]; 00278 if ( ! vs ) { 00279 00280 Log::instance()->error( "Not enough memory to allocate model values. Aborting.\n"); 00281 00282 // If user is tracking progress 00283 if ( ! progress_file.empty() ) { 00284 00285 // -2 means aborted 00286 progressFileCallback( -2.0, &prog_data ); 00287 } 00288 00289 exit(-1); 00290 } 00291 int cnt = 0; 00292 00293 if ( presences ) { 00294 00295 cnt = get_values(alg, env, presences, vs, cnt); 00296 } 00297 if ( absences ) { 00298 00299 cnt = get_values(alg, env, absences, vs, cnt); 00300 } 00301 00302 int precision = 5; 00303 00304 output->addNameValue( "V", vs, num_presences + num_absences, precision ); 00305 00306 std::ostringstream evaluation_output; 00307 00308 Configuration::writeXml( output, evaluation_output ); 00309 00310 std::cerr << flush; 00311 00312 // Write test output to file, if requested 00313 if ( ! result_file.empty() ) { 00314 00315 ofstream file( result_file.c_str() ); 00316 file << evaluation_output.str(); 00317 file.close(); 00318 } 00319 else { 00320 00321 // Otherwise send it to stdout 00322 std::cout << evaluation_output.str().c_str() << endl << flush; 00323 } 00324 00325 // If user wants to track progress 00326 if ( ! progress_file.empty() ) { 00327 00328 // Indicate that the job is finished 00329 progressFileCallback( 1.0, &prog_data ); 00330 } 00331 00332 delete[] vs; 00333 } 00334 catch ( runtime_error e ) { 00335 00336 // If user is tracking progress 00337 if ( ! progress_file.empty() ) { 00338 00339 // -2 means aborted 00340 progressFileCallback( -2.0, &prog_data ); 00341 } 00342 00343 printf( "om_evaluate aborted: %s\n", e.what() ); 00344 } 00345 } 00346 00347 /******************/ 00348 /*** get values ***/ 00349 int 00350 get_values(AlgorithmPtr alg, EnvironmentPtr env, OccurrencesPtr occs, Scalar * values, int cnt) { 00351 00352 OccurrencesImpl::const_iterator oc; 00353 OccurrencesImpl::const_iterator end; 00354 Scalar val; 00355 00356 if ( occs->numOccurrences() > 0 ) { 00357 00358 oc = occs->begin(); 00359 end = occs->end(); 00360 Sample sample; 00361 00362 while ( oc != end ) { 00363 00364 // Use environmental data already provided by the point, if present 00365 // (in this case it will always be unnormalized, so we need to check normalization) 00366 if ( (*oc)->hasEnvironment() ) { 00367 00368 if ( alg->needNormalization() ) { 00369 00370 (*oc)->normalize( alg->getNormalizer(), env->numCategoricalLayers() ); 00371 } 00372 00373 sample = (*oc)->environment(); 00374 } 00375 else { 00376 00377 sample = env->get( (*oc)->x(), (*oc)->y() ); 00378 } 00379 00380 if ( sample.size() > 0 ) { 00381 00382 //sample.dump(); 00383 val = alg->getValue( sample ); 00384 } 00385 else { 00386 val= -1.0; 00387 } 00388 00389 //printf( "val=%0.5f\n", val ); 00390 values[cnt] = val; 00391 ++cnt; 00392 ++oc; 00393 } 00394 } 00395 00396 return cnt; 00397 }