openModeller  Version 1.4.0
request_file.cpp
Go to the documentation of this file.
00001 
00028 #include "request_file.hh"
00029 
00030 #include <openmodeller/om.hh>
00031 #include <openmodeller/FileParser.hh>
00032 
00033 #include <stdlib.h>
00034 #include <string.h>
00035 #include <stdio.h>
00036 
00037 
00038 /**************************************************************/
00039 /************************ Request File ************************/
00040 
00041 RequestFile::RequestFile() :
00042   _occurrencesSet(0),
00043   _environmentSet(0),
00044   _projectionSet(0),
00045   _presences(),
00046   _absences(),
00047   _nonNativeProjection( false ),
00048   _projectionCategoricalMap(),
00049   _projectionMap(),
00050   _inputMask(),
00051   _outputMask(),
00052   _inputModelFile(),
00053   _outputModelFile(),
00054   _projectionFile(),
00055   _outputFormat(),
00056   _spatiallyUnique( false ),
00057   _environmentallyUnique( false ),
00058   _calcConfusionMatrix( true ),
00059   _calcAuc( true )
00060 { 
00061 }
00062 
00063 RequestFile::~RequestFile() 
00064 {
00065 }
00066 
00067 /*****************/
00068 /*** configure ***/
00069 int
00070 RequestFile::configure( OpenModeller *om, char *request_file )
00071 {
00072   std::string input_file( request_file );
00073   FileParser fp( input_file );
00074 
00075   _inputModelFile = fp.get( "Input model" );
00076 
00077   _occurrencesSet = _setOccurrences( om, fp );
00078   _environmentSet = _setEnvironment( om, fp );
00079 
00080   // Optional sampler filters
00081   std::string spatially_unique = fp.get( "Spatially unique" );
00082 
00083   if ( spatially_unique == "true" ) {
00084 
00085     _spatiallyUnique = true;
00086   }
00087 
00088   std::string environmentally_unique = fp.get( "Environmentally unique" );
00089 
00090   if ( environmentally_unique == "true" ) {
00091 
00092     _environmentallyUnique = true;
00093   }
00094 
00095   // Optional model statistics
00096   std::string confusion_matrix = fp.get( "Confusion matrix" );
00097 
00098   if ( confusion_matrix == "false" ) {
00099 
00100     _calcConfusionMatrix = false;
00101   }
00102 
00103   std::string auc = fp.get( "AUC" );
00104 
00105   if ( auc == "false" ) {
00106 
00107     _calcAuc = false;
00108   }
00109 
00110   _projectionSet  = _setProjection ( om, fp );
00111   _algorithmSet   = _setAlgorithm  ( om, fp );
00112 
00113   _outputModelFile = fp.get( "Output model" );
00114 
00115   // Returns ZERO if all was set correctly.
00116   return 4 - _occurrencesSet - _environmentSet -
00117     _projectionSet - _algorithmSet;
00118 }
00119 
00120 
00121 /***********************/
00122 /*** set Occurrences ***/
00123 int
00124 RequestFile::_setOccurrences( OpenModeller *om, FileParser &fp )
00125 {
00126   // Obtain the Well Known Text string for the localities
00127   // coordinate system.
00128   std::string oc_cs = fp.get( "WKT coord system" );
00129 
00130   // Get the name of the file containing localities
00131   std::string oc_file = fp.get( "Occurrences source" );
00132 
00133   if ( oc_file.empty() ) {
00134 
00135     oc_file = fp.get( "Species file" ); // backwards compatibility
00136   }
00137 
00138   // Get the name of the taxon being modelled!
00139   std::string oc_name = fp.get( "Occurrences group" );
00140 
00141   if ( oc_name.empty() ) {
00142 
00143     oc_name = fp.get( "Species" ); // backwards compatibility
00144   }
00145 
00146   // If user provided a serialized model
00147   if ( ! _inputModelFile.empty() ) {
00148 
00149     // Warn if unnecessary parameters were specified 
00150     if ( ! oc_cs.empty() )
00151       Log::instance()->warn( "'WKT coord system' will be ignored since 'Input model' has been specified...\n" );
00152 
00153     if ( ! oc_file.empty() )
00154       Log::instance()->warn( "'Occurrences source'/'Species file' will be ignored since 'Input model' has been specified...\n" );
00155 
00156     if ( ! oc_name.empty() )
00157       Log::instance()->warn( "'Occurrences group'/'Species' will be ignored since 'Input model' has been specified...\n" );
00158 
00159     return 1;
00160   }
00161 
00162   // When a model needs to be created, 'WKT coord system' and 
00163   // 'Species file' are mandatory parameters
00164   if ( oc_cs.empty() ) {
00165     Log::instance()->error( "'WKT coord system' keyword was not specified in the request file!\n" );
00166     return 0;
00167   }
00168 
00169   if ( oc_file.empty() ) {
00170 
00171     Log::instance()->error( "'Occurrences source' keyword was not specified in the request file!\n" );
00172     return 0;
00173   }
00174 
00175   // Populate the occurences list from the localities file
00176   Log::instance()->info( "Reading occurrences...\r" );
00177 
00178   OccurrencesReader* oc_reader = OccurrencesFactory::instance().create( oc_file.c_str(), oc_cs.c_str() );
00179 
00180   _presences = oc_reader->getPresences( oc_name.c_str() );
00181 
00182   _absences = oc_reader->getAbsences( oc_name.c_str() );
00183 
00184   delete oc_reader;
00185 
00186   if ( _absences )
00187   {
00188     Log::instance()->info( "Reading occurrences...done\n" );
00189 
00190     return om->setOccurrences( _presences, _absences );
00191   }
00192   else if ( _presences ) {
00193 
00194     Log::instance()->info( "Reading occurrences...done\n" );
00195     return om->setOccurrences( _presences );
00196   }
00197   else {
00198 
00199     Log::instance()->error( "Could not read any occurrences!\n" );
00200 
00201     return 0;
00202   }
00203 }
00204 
00205 
00206 /***********************/
00207 /*** set Environment ***/
00208 int
00209 RequestFile::_setEnvironment( OpenModeller *om, FileParser &fp )
00210 {
00211   // Mask to select the desired species occurrence points
00212   _inputMask = fp.get( "Mask" );
00213 
00214   // Initiate the environment with all maps.
00215   std::vector<std::string> cat = fp.getAll( "Categorical map" );
00216   std::vector<std::string> map = fp.getAll( "Map" );
00217 
00218   // If user provided a serialized model
00219   if ( ! _inputModelFile.empty() ) {
00220 
00221     // Warn if unnecessary parameters were specified 
00222     if ( ! _inputMask.empty() )
00223       Log::instance()->warn( "'Mask' will be ignored since 'Input model' has been specified...\n" );
00224 
00225     if ( cat.size() > 0 )
00226       Log::instance()->warn( "'Categorical map' will be ignored since 'Input model' has been specified...\n" );
00227 
00228     if ( map.size() > 0 )
00229       Log::instance()->warn( "'Map' will be ignored since 'Input model' has been specified...\n" );
00230 
00231     return 1;
00232   }
00233 
00234   // When a model needs to be created, there should be at least one input map
00235   if ( ! (cat.size() + map.size()) ) {
00236 
00237     Log::instance()->error( "At least one 'Map' or 'Categorical map' needs to be specified!\n" );
00238     return 0;
00239   }
00240 
00241   // Mask is also mandatory
00242   if ( _inputMask.empty() ) {
00243     Log::instance()->error( "'Mask' was not specified!\n" );
00244     return 0;
00245   }
00246 
00247   // Set input environment
00248   Log::instance()->info( "Reading layers...\r" );
00249   om->setEnvironment( cat, map, _inputMask );
00250   Log::instance()->info( "Reading layers...done\n" );
00251 
00252   return 1;
00253 }
00254 
00255 
00256 /**********************/
00257 /*** set Projection ***/
00258 int
00259 RequestFile::_setProjection( OpenModeller *om, FileParser &fp )
00260 {
00261   _projectionFile = fp.get( "Output file" );
00262 
00263   if ( _projectionFile.empty() ) {
00264 
00265     Log::instance()->warn( "'Output file' was not specified.\n" );
00266     return 1;
00267   }
00268 
00269   // Categorical environmental maps and the number of these maps.
00270   _projectionCategoricalMap = fp.getAll( "Categorical output map" );
00271 
00272   // Continuous environmental maps and the number of these maps.
00273   _projectionMap = fp.getAll( "Output Map" );
00274 
00275   // If user provided a serialized model
00276   if ( !_inputModelFile.empty() ) {
00277 
00278     // note: should we accept native projections using environment from serialized models?
00279     _nonNativeProjection = true;
00280 
00281     // So, assume that in this case projection maps are mandatory.
00282     if ( ! (_projectionCategoricalMap.size() + _projectionMap.size()) ) {
00283 
00284       Log::instance()->error( "At least one 'Output map' or 'Categorical output map' needs to be specified!\n" );
00285       return 0;
00286     }
00287   }
00288   else {
00289 
00290     // It is ok to not set the projection.
00291     if ( ! (_projectionCategoricalMap.size() + _projectionMap.size()) ) {
00292 
00293       Log::instance()->info("Projection not set: using training Environment for projection\n");
00294       _nonNativeProjection = false;
00295     }
00296     else {
00297 
00298       _nonNativeProjection = true;
00299     }
00300   }
00301 
00302   // Get the output mask
00303   _outputMask = fp.get( "Output mask" );
00304 
00305   if ( _nonNativeProjection && _outputMask.empty() ) {
00306 
00307     Log::instance()->error( "'Output mask' was not specified!\n" );
00308     return 0;
00309   }
00310 
00311   // Template header to be used by the generated map
00312   std::string format = fp.get( "Output format" );
00313 
00314   if ( ! format.empty() ) {
00315 
00316     _outputFormat = MapFormat( format.c_str() );
00317   }
00318 
00319   // File type
00320   std::string fileType = fp.get( "Output file type" );
00321 
00322   if ( ! fileType.empty() ) {
00323 
00324     _outputFormat.setFormat( fileType );
00325   }
00326 
00327   // Overwrite output extent with values from mask
00328   const std::string maskFile = ( _nonNativeProjection ) ? _outputMask.c_str() : _inputMask.c_str();
00329 
00330   Raster* mask = RasterFactory::instance().create( maskFile );
00331 
00332   Header h = mask->header();
00333 
00334   _outputFormat.setXMin( h.xmin );
00335   _outputFormat.setYMin( h.ymin );
00336   _outputFormat.setXMax( h.xmax );
00337   _outputFormat.setYMax( h.ymax );
00338 
00339   delete mask;
00340 
00341   return 1;
00342 }
00343 
00344 
00345 /***********************/
00346 /*** set Algorithm ***/
00347 int
00348 RequestFile::_setAlgorithm( OpenModeller *om, FileParser &fp )
00349 {
00350   // Find out which model algorithm is to be used.
00351   AlgMetadata const *metadata;
00352   std::string alg_id = fp.get( "Algorithm" );
00353 
00354   // If user provided a serialized model
00355   if ( ! _inputModelFile.empty() ) {
00356     // Warn if unnecessary parameters were specified 
00357     if ( ! alg_id.empty() )
00358       Log::instance()->warn( "'Algorithm' will be ignored since 'Input model' has been specified...\n" );
00359 
00360     return 1;
00361   }
00362 
00363   // Note: console tries to get an algorithm from user input
00364   // if it was not specified in the request file.
00365   if ( alg_id.empty() )
00366     return 0;
00367 
00368   // Try to use the algorithm specified in the request file.
00369   // If it cannot be used, return 0.
00370   try {
00371 
00372     // An exception here means that the algorithm wasn't found.
00373     metadata = om->algorithmMetadata( alg_id.c_str() );
00374   }
00375   catch (...) {
00376 
00377     Log::instance()->error( "Algorithm '%s' specified in the request file was not found\n", 
00378                  alg_id.c_str() );
00379     return 0;
00380   }
00381 
00382   // Obtain any model parameter specified in the request file.
00383   // read parameters from file into req_param parameters
00384   std::vector<std::string> req_param = fp.getAll( "Parameter" );
00385 
00386   // For resulting parameters storage.
00387   int nparam = metadata->nparam;
00388   AlgParameter *param = new AlgParameter[nparam];
00389 
00390   // Read from console the parameters not set by request
00391   // file. Fills 'param' with all 'metadata->nparam' parameters
00392   // set.
00393   _readParameters( param, metadata, req_param );
00394 
00395   // Set the model algorithm to be used by the controller
00396   int resp = om->setAlgorithm( metadata->id, nparam, param );
00397 
00398   if ( resp == 0 ) {
00399 
00400     Log::instance()->error( "Could not set the algorithm to be used\n" );
00401   }
00402 
00403   delete[] param;
00404 
00405   return resp;
00406 }
00407 
00408 /*********************/
00409 /*** get Presences ***/
00410 OccurrencesPtr
00411 RequestFile::getPresences( )
00412 {
00413   if ( ! _presences ) {
00414 
00415     Log::instance()->error( "Could not read occurrences from request file. Make sure 'Occurrences source' has been specified.\n" );
00416   }
00417 
00418   return _presences;
00419 }
00420 
00421 
00422 /*********************/
00423 /*** get Absences ***/
00424 OccurrencesPtr
00425 RequestFile::getAbsences( )
00426 {
00427   return _absences;
00428 }
00429 
00430 /***********************/
00431 /*** read Parameters ***/
00432 int
00433 RequestFile::_readParameters( AlgParameter *result,
00434                               AlgMetadata const *metadata,
00435                               std::vector<std::string> str_param )
00436 {
00437   AlgParamMetadata *param = metadata->param;
00438   AlgParamMetadata *end   = param + metadata->nparam;
00439 
00440   // For each algorithm parameter metadata...
00441   for ( ; param < end; param++, result++ ) {
00442 
00443     // The resulting name is equal the name set in
00444     // algorithm's metadata.
00445     result->setId( param->id );
00446 
00447     // Read the resulting value from str_param.
00448     std::string value = extractParameter( result->id(), str_param );
00449 
00450     // If the parameter is not referenced in the file, set it
00451     // with the default value extracted from the parameter
00452     // metadata.
00453     if ( value.empty() )
00454       value = param->typical;
00455 
00456     result->setValue( value.c_str() );
00457   }
00458 
00459   return metadata->nparam;
00460 }
00461 
00462 
00463 /*************************/
00464 /*** extract Parameter ***/
00465 std::string
00466 RequestFile::extractParameter( std::string const name, 
00467              std::vector<std::string> vet )
00468 {
00469   int length = name.length();
00470   std::vector<std::string>::iterator it = vet.begin();
00471   std::vector<std::string>::iterator end = vet.end();
00472 
00473   while ( it != end ) {
00474 
00475     if ( name == (*it).substr( 0, length ) ) {
00476 
00477       std::string value = (*it).substr( length );
00478 
00479       // Left trim the value 
00480       std::string::size_type pos = value.find_first_not_of(' ');
00481 
00482       if ( pos != std::string::npos ) {
00483 
00484         value.erase( 0, pos );
00485       }
00486 
00487       return value;
00488     }
00489 
00490     ++it;
00491   }
00492 
00493   return "";
00494 }
00495 
00496 /****************************/
00497 /*** requested Projection ***/
00498 bool
00499 RequestFile::requestedProjection( )
00500 {
00501   return ! _projectionFile.empty();
00502 }
00503 
00504 /******************/
00505 /*** make Model ***/
00506 void
00507 RequestFile::makeModel( OpenModeller *om )
00508 {
00509   // No serialized model - create model with all settings from the request file
00510   if ( _inputModelFile.empty() ) {
00511 
00512     // Apply sampler filters if requested by user
00513     if ( _spatiallyUnique ) {
00514 
00515       SamplerPtr sampler = om->getSampler();
00516 
00517       if ( sampler ) {
00518 
00519         sampler->spatiallyUnique();
00520       }
00521       else {
00522 
00523         Log::instance()->warn( "Cannot set spatially unique filter: no sampler available\n" );
00524       }
00525     }
00526 
00527     if ( _environmentallyUnique ) {
00528 
00529       SamplerPtr sampler = om->getSampler();
00530 
00531       if ( sampler ) {
00532 
00533         sampler->environmentallyUnique();
00534       }
00535       else {
00536 
00537         Log::instance()->warn( "Cannot set environmentally unique filter: no sampler available\n" );
00538       }
00539     }
00540   }
00541   // If user provided a serialized model, just load it
00542   else {
00543 
00544     Log::instance()->info( "Loading serialized model\n" );
00545 
00546     char* file_name = new char [_inputModelFile.size() + 1];
00547     strcpy( file_name, _inputModelFile.c_str() );
00548 
00549     ConfigurationPtr conf = Configuration::readXml( file_name );
00550 
00551     om->setModelConfiguration( conf );
00552 
00553     delete[] file_name;
00554 
00555     return;
00556   }
00557 
00558   // Build model
00559   if ( ! om->createModel() ) {
00560 
00561     Log::instance()->error( "Error during model creation: %s\n", om->error() );
00562     return;
00563   }
00564 
00565   if ( calcConfusionMatrix() ) {
00566 
00567     // Calculate confusion matrix to store in the serialized model
00568     om->getConfusionMatrix();
00569   }
00570 
00571   if ( calcAuc() ) {
00572 
00573     // Calculate ROC curve to store in the serialized model
00574     om->getRocCurve()->getTotalArea();
00575   }
00576 
00577   // Serialize model, if requested
00578   if ( _inputModelFile.empty() && ! _outputModelFile.empty() ) {
00579 
00580     char* file_name = new char [_outputModelFile.size() + 1];
00581     strcpy( file_name, _outputModelFile.c_str() );
00582 
00583     ConfigurationPtr cfg = om->getModelConfiguration();
00584     Configuration::writeXml( cfg, file_name );
00585 
00586     delete[] file_name;
00587   }
00588 }
00589 
00590 
00591 /***********************/
00592 /*** make Projection ***/
00593 void
00594 RequestFile::makeProjection( OpenModeller *om )
00595 {
00596   if ( _projectionSet == 0 ) {
00597 
00598     Log::instance()->error( "Error during projection: Request not properly initialized\n" );
00599     return;
00600   }
00601 
00602   if ( !_nonNativeProjection ) {
00603 
00604     om->createMap( _projectionFile.c_str(), _outputFormat );
00605   }
00606   else {
00607 
00608     EnvironmentPtr env = createEnvironment( _projectionCategoricalMap, _projectionMap, _outputMask );
00609 
00610     om->createMap( env, _projectionFile.c_str(), _outputFormat );
00611   }
00612 }