openModeller
Version 1.4.0
|
00001 00028 #include "request_file.hh" 00029 00030 #include <openmodeller/om.hh> 00031 #include <openmodeller/FileParser.hh> 00032 00033 #include <stdlib.h> 00034 #include <string.h> 00035 #include <stdio.h> 00036 00037 00038 /**************************************************************/ 00039 /************************ Request File ************************/ 00040 00041 RequestFile::RequestFile() : 00042 _occurrencesSet(0), 00043 _environmentSet(0), 00044 _projectionSet(0), 00045 _presences(), 00046 _absences(), 00047 _nonNativeProjection( false ), 00048 _projectionCategoricalMap(), 00049 _projectionMap(), 00050 _inputMask(), 00051 _outputMask(), 00052 _inputModelFile(), 00053 _outputModelFile(), 00054 _projectionFile(), 00055 _outputFormat(), 00056 _spatiallyUnique( false ), 00057 _environmentallyUnique( false ), 00058 _calcConfusionMatrix( true ), 00059 _calcAuc( true ) 00060 { 00061 } 00062 00063 RequestFile::~RequestFile() 00064 { 00065 } 00066 00067 /*****************/ 00068 /*** configure ***/ 00069 int 00070 RequestFile::configure( OpenModeller *om, char *request_file ) 00071 { 00072 std::string input_file( request_file ); 00073 FileParser fp( input_file ); 00074 00075 _inputModelFile = fp.get( "Input model" ); 00076 00077 _occurrencesSet = _setOccurrences( om, fp ); 00078 _environmentSet = _setEnvironment( om, fp ); 00079 00080 // Optional sampler filters 00081 std::string spatially_unique = fp.get( "Spatially unique" ); 00082 00083 if ( spatially_unique == "true" ) { 00084 00085 _spatiallyUnique = true; 00086 } 00087 00088 std::string environmentally_unique = fp.get( "Environmentally unique" ); 00089 00090 if ( environmentally_unique == "true" ) { 00091 00092 _environmentallyUnique = true; 00093 } 00094 00095 // Optional model statistics 00096 std::string confusion_matrix = fp.get( "Confusion matrix" ); 00097 00098 if ( confusion_matrix == "false" ) { 00099 00100 _calcConfusionMatrix = false; 00101 } 00102 00103 std::string auc = fp.get( "AUC" ); 00104 00105 if ( auc == "false" ) { 00106 00107 _calcAuc = false; 00108 } 00109 00110 _projectionSet = _setProjection ( om, fp ); 00111 _algorithmSet = _setAlgorithm ( om, fp ); 00112 00113 _outputModelFile = fp.get( "Output model" ); 00114 00115 // Returns ZERO if all was set correctly. 00116 return 4 - _occurrencesSet - _environmentSet - 00117 _projectionSet - _algorithmSet; 00118 } 00119 00120 00121 /***********************/ 00122 /*** set Occurrences ***/ 00123 int 00124 RequestFile::_setOccurrences( OpenModeller *om, FileParser &fp ) 00125 { 00126 // Obtain the Well Known Text string for the localities 00127 // coordinate system. 00128 std::string oc_cs = fp.get( "WKT coord system" ); 00129 00130 // Get the name of the file containing localities 00131 std::string oc_file = fp.get( "Occurrences source" ); 00132 00133 if ( oc_file.empty() ) { 00134 00135 oc_file = fp.get( "Species file" ); // backwards compatibility 00136 } 00137 00138 // Get the name of the taxon being modelled! 00139 std::string oc_name = fp.get( "Occurrences group" ); 00140 00141 if ( oc_name.empty() ) { 00142 00143 oc_name = fp.get( "Species" ); // backwards compatibility 00144 } 00145 00146 // If user provided a serialized model 00147 if ( ! _inputModelFile.empty() ) { 00148 00149 // Warn if unnecessary parameters were specified 00150 if ( ! oc_cs.empty() ) 00151 Log::instance()->warn( "'WKT coord system' will be ignored since 'Input model' has been specified...\n" ); 00152 00153 if ( ! oc_file.empty() ) 00154 Log::instance()->warn( "'Occurrences source'/'Species file' will be ignored since 'Input model' has been specified...\n" ); 00155 00156 if ( ! oc_name.empty() ) 00157 Log::instance()->warn( "'Occurrences group'/'Species' will be ignored since 'Input model' has been specified...\n" ); 00158 00159 return 1; 00160 } 00161 00162 // When a model needs to be created, 'WKT coord system' and 00163 // 'Species file' are mandatory parameters 00164 if ( oc_cs.empty() ) { 00165 Log::instance()->error( "'WKT coord system' keyword was not specified in the request file!\n" ); 00166 return 0; 00167 } 00168 00169 if ( oc_file.empty() ) { 00170 00171 Log::instance()->error( "'Occurrences source' keyword was not specified in the request file!\n" ); 00172 return 0; 00173 } 00174 00175 // Populate the occurences list from the localities file 00176 Log::instance()->info( "Reading occurrences...\r" ); 00177 00178 OccurrencesReader* oc_reader = OccurrencesFactory::instance().create( oc_file.c_str(), oc_cs.c_str() ); 00179 00180 _presences = oc_reader->getPresences( oc_name.c_str() ); 00181 00182 _absences = oc_reader->getAbsences( oc_name.c_str() ); 00183 00184 delete oc_reader; 00185 00186 if ( _absences ) 00187 { 00188 Log::instance()->info( "Reading occurrences...done\n" ); 00189 00190 return om->setOccurrences( _presences, _absences ); 00191 } 00192 else if ( _presences ) { 00193 00194 Log::instance()->info( "Reading occurrences...done\n" ); 00195 return om->setOccurrences( _presences ); 00196 } 00197 else { 00198 00199 Log::instance()->error( "Could not read any occurrences!\n" ); 00200 00201 return 0; 00202 } 00203 } 00204 00205 00206 /***********************/ 00207 /*** set Environment ***/ 00208 int 00209 RequestFile::_setEnvironment( OpenModeller *om, FileParser &fp ) 00210 { 00211 // Mask to select the desired species occurrence points 00212 _inputMask = fp.get( "Mask" ); 00213 00214 // Initiate the environment with all maps. 00215 std::vector<std::string> cat = fp.getAll( "Categorical map" ); 00216 std::vector<std::string> map = fp.getAll( "Map" ); 00217 00218 // If user provided a serialized model 00219 if ( ! _inputModelFile.empty() ) { 00220 00221 // Warn if unnecessary parameters were specified 00222 if ( ! _inputMask.empty() ) 00223 Log::instance()->warn( "'Mask' will be ignored since 'Input model' has been specified...\n" ); 00224 00225 if ( cat.size() > 0 ) 00226 Log::instance()->warn( "'Categorical map' will be ignored since 'Input model' has been specified...\n" ); 00227 00228 if ( map.size() > 0 ) 00229 Log::instance()->warn( "'Map' will be ignored since 'Input model' has been specified...\n" ); 00230 00231 return 1; 00232 } 00233 00234 // When a model needs to be created, there should be at least one input map 00235 if ( ! (cat.size() + map.size()) ) { 00236 00237 Log::instance()->error( "At least one 'Map' or 'Categorical map' needs to be specified!\n" ); 00238 return 0; 00239 } 00240 00241 // Mask is also mandatory 00242 if ( _inputMask.empty() ) { 00243 Log::instance()->error( "'Mask' was not specified!\n" ); 00244 return 0; 00245 } 00246 00247 // Set input environment 00248 Log::instance()->info( "Reading layers...\r" ); 00249 om->setEnvironment( cat, map, _inputMask ); 00250 Log::instance()->info( "Reading layers...done\n" ); 00251 00252 return 1; 00253 } 00254 00255 00256 /**********************/ 00257 /*** set Projection ***/ 00258 int 00259 RequestFile::_setProjection( OpenModeller *om, FileParser &fp ) 00260 { 00261 _projectionFile = fp.get( "Output file" ); 00262 00263 if ( _projectionFile.empty() ) { 00264 00265 Log::instance()->warn( "'Output file' was not specified.\n" ); 00266 return 1; 00267 } 00268 00269 // Categorical environmental maps and the number of these maps. 00270 _projectionCategoricalMap = fp.getAll( "Categorical output map" ); 00271 00272 // Continuous environmental maps and the number of these maps. 00273 _projectionMap = fp.getAll( "Output Map" ); 00274 00275 // If user provided a serialized model 00276 if ( !_inputModelFile.empty() ) { 00277 00278 // note: should we accept native projections using environment from serialized models? 00279 _nonNativeProjection = true; 00280 00281 // So, assume that in this case projection maps are mandatory. 00282 if ( ! (_projectionCategoricalMap.size() + _projectionMap.size()) ) { 00283 00284 Log::instance()->error( "At least one 'Output map' or 'Categorical output map' needs to be specified!\n" ); 00285 return 0; 00286 } 00287 } 00288 else { 00289 00290 // It is ok to not set the projection. 00291 if ( ! (_projectionCategoricalMap.size() + _projectionMap.size()) ) { 00292 00293 Log::instance()->info("Projection not set: using training Environment for projection\n"); 00294 _nonNativeProjection = false; 00295 } 00296 else { 00297 00298 _nonNativeProjection = true; 00299 } 00300 } 00301 00302 // Get the output mask 00303 _outputMask = fp.get( "Output mask" ); 00304 00305 if ( _nonNativeProjection && _outputMask.empty() ) { 00306 00307 Log::instance()->error( "'Output mask' was not specified!\n" ); 00308 return 0; 00309 } 00310 00311 // Template header to be used by the generated map 00312 std::string format = fp.get( "Output format" ); 00313 00314 if ( ! format.empty() ) { 00315 00316 _outputFormat = MapFormat( format.c_str() ); 00317 } 00318 00319 // File type 00320 std::string fileType = fp.get( "Output file type" ); 00321 00322 if ( ! fileType.empty() ) { 00323 00324 _outputFormat.setFormat( fileType ); 00325 } 00326 00327 // Overwrite output extent with values from mask 00328 const std::string maskFile = ( _nonNativeProjection ) ? _outputMask.c_str() : _inputMask.c_str(); 00329 00330 Raster* mask = RasterFactory::instance().create( maskFile ); 00331 00332 Header h = mask->header(); 00333 00334 _outputFormat.setXMin( h.xmin ); 00335 _outputFormat.setYMin( h.ymin ); 00336 _outputFormat.setXMax( h.xmax ); 00337 _outputFormat.setYMax( h.ymax ); 00338 00339 delete mask; 00340 00341 return 1; 00342 } 00343 00344 00345 /***********************/ 00346 /*** set Algorithm ***/ 00347 int 00348 RequestFile::_setAlgorithm( OpenModeller *om, FileParser &fp ) 00349 { 00350 // Find out which model algorithm is to be used. 00351 AlgMetadata const *metadata; 00352 std::string alg_id = fp.get( "Algorithm" ); 00353 00354 // If user provided a serialized model 00355 if ( ! _inputModelFile.empty() ) { 00356 // Warn if unnecessary parameters were specified 00357 if ( ! alg_id.empty() ) 00358 Log::instance()->warn( "'Algorithm' will be ignored since 'Input model' has been specified...\n" ); 00359 00360 return 1; 00361 } 00362 00363 // Note: console tries to get an algorithm from user input 00364 // if it was not specified in the request file. 00365 if ( alg_id.empty() ) 00366 return 0; 00367 00368 // Try to use the algorithm specified in the request file. 00369 // If it cannot be used, return 0. 00370 try { 00371 00372 // An exception here means that the algorithm wasn't found. 00373 metadata = om->algorithmMetadata( alg_id.c_str() ); 00374 } 00375 catch (...) { 00376 00377 Log::instance()->error( "Algorithm '%s' specified in the request file was not found\n", 00378 alg_id.c_str() ); 00379 return 0; 00380 } 00381 00382 // Obtain any model parameter specified in the request file. 00383 // read parameters from file into req_param parameters 00384 std::vector<std::string> req_param = fp.getAll( "Parameter" ); 00385 00386 // For resulting parameters storage. 00387 int nparam = metadata->nparam; 00388 AlgParameter *param = new AlgParameter[nparam]; 00389 00390 // Read from console the parameters not set by request 00391 // file. Fills 'param' with all 'metadata->nparam' parameters 00392 // set. 00393 _readParameters( param, metadata, req_param ); 00394 00395 // Set the model algorithm to be used by the controller 00396 int resp = om->setAlgorithm( metadata->id, nparam, param ); 00397 00398 if ( resp == 0 ) { 00399 00400 Log::instance()->error( "Could not set the algorithm to be used\n" ); 00401 } 00402 00403 delete[] param; 00404 00405 return resp; 00406 } 00407 00408 /*********************/ 00409 /*** get Presences ***/ 00410 OccurrencesPtr 00411 RequestFile::getPresences( ) 00412 { 00413 if ( ! _presences ) { 00414 00415 Log::instance()->error( "Could not read occurrences from request file. Make sure 'Occurrences source' has been specified.\n" ); 00416 } 00417 00418 return _presences; 00419 } 00420 00421 00422 /*********************/ 00423 /*** get Absences ***/ 00424 OccurrencesPtr 00425 RequestFile::getAbsences( ) 00426 { 00427 return _absences; 00428 } 00429 00430 /***********************/ 00431 /*** read Parameters ***/ 00432 int 00433 RequestFile::_readParameters( AlgParameter *result, 00434 AlgMetadata const *metadata, 00435 std::vector<std::string> str_param ) 00436 { 00437 AlgParamMetadata *param = metadata->param; 00438 AlgParamMetadata *end = param + metadata->nparam; 00439 00440 // For each algorithm parameter metadata... 00441 for ( ; param < end; param++, result++ ) { 00442 00443 // The resulting name is equal the name set in 00444 // algorithm's metadata. 00445 result->setId( param->id ); 00446 00447 // Read the resulting value from str_param. 00448 std::string value = extractParameter( result->id(), str_param ); 00449 00450 // If the parameter is not referenced in the file, set it 00451 // with the default value extracted from the parameter 00452 // metadata. 00453 if ( value.empty() ) 00454 value = param->typical; 00455 00456 result->setValue( value.c_str() ); 00457 } 00458 00459 return metadata->nparam; 00460 } 00461 00462 00463 /*************************/ 00464 /*** extract Parameter ***/ 00465 std::string 00466 RequestFile::extractParameter( std::string const name, 00467 std::vector<std::string> vet ) 00468 { 00469 int length = name.length(); 00470 std::vector<std::string>::iterator it = vet.begin(); 00471 std::vector<std::string>::iterator end = vet.end(); 00472 00473 while ( it != end ) { 00474 00475 if ( name == (*it).substr( 0, length ) ) { 00476 00477 std::string value = (*it).substr( length ); 00478 00479 // Left trim the value 00480 std::string::size_type pos = value.find_first_not_of(' '); 00481 00482 if ( pos != std::string::npos ) { 00483 00484 value.erase( 0, pos ); 00485 } 00486 00487 return value; 00488 } 00489 00490 ++it; 00491 } 00492 00493 return ""; 00494 } 00495 00496 /****************************/ 00497 /*** requested Projection ***/ 00498 bool 00499 RequestFile::requestedProjection( ) 00500 { 00501 return ! _projectionFile.empty(); 00502 } 00503 00504 /******************/ 00505 /*** make Model ***/ 00506 void 00507 RequestFile::makeModel( OpenModeller *om ) 00508 { 00509 // No serialized model - create model with all settings from the request file 00510 if ( _inputModelFile.empty() ) { 00511 00512 // Apply sampler filters if requested by user 00513 if ( _spatiallyUnique ) { 00514 00515 SamplerPtr sampler = om->getSampler(); 00516 00517 if ( sampler ) { 00518 00519 sampler->spatiallyUnique(); 00520 } 00521 else { 00522 00523 Log::instance()->warn( "Cannot set spatially unique filter: no sampler available\n" ); 00524 } 00525 } 00526 00527 if ( _environmentallyUnique ) { 00528 00529 SamplerPtr sampler = om->getSampler(); 00530 00531 if ( sampler ) { 00532 00533 sampler->environmentallyUnique(); 00534 } 00535 else { 00536 00537 Log::instance()->warn( "Cannot set environmentally unique filter: no sampler available\n" ); 00538 } 00539 } 00540 } 00541 // If user provided a serialized model, just load it 00542 else { 00543 00544 Log::instance()->info( "Loading serialized model\n" ); 00545 00546 char* file_name = new char [_inputModelFile.size() + 1]; 00547 strcpy( file_name, _inputModelFile.c_str() ); 00548 00549 ConfigurationPtr conf = Configuration::readXml( file_name ); 00550 00551 om->setModelConfiguration( conf ); 00552 00553 delete[] file_name; 00554 00555 return; 00556 } 00557 00558 // Build model 00559 if ( ! om->createModel() ) { 00560 00561 Log::instance()->error( "Error during model creation: %s\n", om->error() ); 00562 return; 00563 } 00564 00565 if ( calcConfusionMatrix() ) { 00566 00567 // Calculate confusion matrix to store in the serialized model 00568 om->getConfusionMatrix(); 00569 } 00570 00571 if ( calcAuc() ) { 00572 00573 // Calculate ROC curve to store in the serialized model 00574 om->getRocCurve()->getTotalArea(); 00575 } 00576 00577 // Serialize model, if requested 00578 if ( _inputModelFile.empty() && ! _outputModelFile.empty() ) { 00579 00580 char* file_name = new char [_outputModelFile.size() + 1]; 00581 strcpy( file_name, _outputModelFile.c_str() ); 00582 00583 ConfigurationPtr cfg = om->getModelConfiguration(); 00584 Configuration::writeXml( cfg, file_name ); 00585 00586 delete[] file_name; 00587 } 00588 } 00589 00590 00591 /***********************/ 00592 /*** make Projection ***/ 00593 void 00594 RequestFile::makeProjection( OpenModeller *om ) 00595 { 00596 if ( _projectionSet == 0 ) { 00597 00598 Log::instance()->error( "Error during projection: Request not properly initialized\n" ); 00599 return; 00600 } 00601 00602 if ( !_nonNativeProjection ) { 00603 00604 om->createMap( _projectionFile.c_str(), _outputFormat ); 00605 } 00606 else { 00607 00608 EnvironmentPtr env = createEnvironment( _projectionCategoricalMap, _projectionMap, _outputMask ); 00609 00610 om->createMap( env, _projectionFile.c_str(), _outputFormat ); 00611 } 00612 }