openModeller  Version 1.4.0
RemoteRaster.cpp
Go to the documentation of this file.
00001 
00027 #include <openmodeller/env_io/RemoteRaster.hh>
00028 #include <openmodeller/Log.hh>
00029 #include <openmodeller/Exceptions.hh>
00030 #include <openmodeller/MapFormat.hh>
00031 #include <openmodeller/CacheManager.hh>
00032 #include <openmodeller/Settings.hh>
00033 
00034 #include <vector>
00035 #include <string.h>
00036 #include <sstream>
00037 #include <algorithm>
00038 
00039 using namespace std;
00040 
00041 #include <curl/curl.h>
00042 
00043 #ifdef MPI_FOUND
00044 #include "mpi.h"
00045 #endif
00046 
00047 /*************************************************************/
00048 /*********************** Remote Raster ***********************/
00049 
00050 /******************/
00051 /*** Destructor ***/
00052 RemoteRaster::~RemoteRaster()
00053 {
00054 }
00055 
00056 /******************************/
00057 /*** create Raster Callback ***/
00058 Raster*
00059 RemoteRaster::CreateRasterCallback()
00060 {
00061   return new RemoteRaster();
00062 }
00063 
00064 /*********************/
00065 /*** create Raster ***/
00066 void 
00067 RemoteRaster::createRaster( const string& str, int categ )
00068 {
00069   string cached_ref = CacheManager::getContentLocationMd5( str, OM_REMOTE_RASTER_SUBDIR );
00070   string cache_id = CacheManager::getContentIdMd5( str );
00071 
00072   // Check if file is being downloaded (presence of lock file)
00073   std::string lock_file = cache_id;
00074   lock_file.append(".lock");
00075 
00076   if ( CacheManager::isCached( lock_file, OM_REMOTE_RASTER_SUBDIR ) ) {
00077 
00078     Log::instance()->debug( "Ongoing concurrent download\n" );
00079     throw RasterException( "Ongoing concurrent download", 1 );
00080   }
00081 
00082   if ( CacheManager::isCachedMd5( str, OM_REMOTE_RASTER_SUBDIR ) ) {
00083 
00084     Log::instance()->debug( "Layer %s already present in local cache (%s)\n", str.c_str(), cache_id.c_str() );
00085 
00086     GdalRaster::createRaster( cached_ref, categ );
00087 
00088     // TODO: send HEADER request to check if remote file has changed.
00089     //       (or always try to retrieve file anyway, but including If-Modified-Since and handling 304 code)
00090   }
00091   else {
00092 
00093     if ( isFromRejectedSource( str ) ) {
00094 
00095       std::string msg = "Untrusted source for remote raster. Aborting operation.\n";
00096       Log::instance()->error( msg.c_str() );
00097       throw RasterException( msg.c_str() );
00098     }
00099 
00100     std::string retries_file = cache_id;
00101     retries_file.append(".tries");
00102 
00103     std::string retries_fullpath = CacheManager::getContentLocation( retries_file, OM_REMOTE_RASTER_SUBDIR );
00104     int num_retries = 0;
00105 
00106     // Check number of previous attempts
00107     if ( CacheManager::isCached( retries_file, OM_REMOTE_RASTER_SUBDIR ) ) {
00108 
00109       fstream fin;
00110       fin.open( retries_fullpath.c_str(), ios::in );
00111 
00112       if ( fin.is_open() ) {
00113 
00114         ostringstream oss;
00115         string line;
00116 
00117         getline( fin, line );
00118         oss << line << endl;
00119 
00120         // Note: if the content is empty, atoi returns 0
00121         num_retries = atoi( oss.str().c_str() );
00122 
00123         fin.close();
00124       }
00125       else {
00126 
00127         throw RasterException( "Could not determine number of previous download retries." );
00128       }
00129 
00130       if ( num_retries > 3 ) {
00131 
00132         CacheManager::eraseCache( retries_file, OM_REMOTE_RASTER_SUBDIR );
00133         throw RasterException( "Too many attempts to fetch raster. Aborting." );
00134       }
00135     }
00136 
00137     // Fetch file
00138     try {
00139 
00140       // Last minute double check
00141       if ( CacheManager::isCached( lock_file, OM_REMOTE_RASTER_SUBDIR ) ) {
00142 
00143         throw RasterException( "Ongoing concurrent download", 1 );
00144       }
00145 
00146       // Create lock file
00147       ostringstream oss (ostringstream::out);
00148       CacheManager::cache( lock_file, oss, OM_REMOTE_RASTER_SUBDIR );
00149 
00150       // Increase number of retries
00151       FILE *p_file = NULL;
00152       p_file = fopen( retries_fullpath.c_str(), "w" );
00153 
00154       if ( p_file == NULL ) {
00155 
00156         // Could not open file
00157         throw RasterException( "Could not store number of download retries." );
00158       }
00159       else {
00160 
00161         ++num_retries;
00162         char buffer[2];
00163         sprintf( buffer, "%d", num_retries );
00164         fputs( buffer, p_file );
00165         fclose( p_file );
00166       }
00167 
00168       // Finally fetch raster
00169       Log::instance()->debug( "Fetching remote raster %s (%s)...\n", str.c_str(), cache_id.c_str() );
00170 
00171       CURL *curl;
00172 
00173       static CacheFile file_data;
00174       file_data.fileName = cached_ref.c_str();
00175       file_data.stream = NULL;
00176 
00177       curl_global_init( CURL_GLOBAL_DEFAULT );
00178 
00179       curl = curl_easy_init();
00180 
00181       if ( curl ) {
00182 
00183         if ( CURLE_OK != curl_easy_setopt( curl, CURLOPT_URL, str.c_str() ) ) {
00184 
00185           std::string msg = "Could not configure remote raster fetcher.\n";
00186           Log::instance()->error( msg.c_str() );
00187           throw RasterException( msg.c_str() );
00188         }
00189 
00190         // Enable following redirections
00191         curl_easy_setopt( curl, CURLOPT_FOLLOWLOCATION, 1 );
00192         curl_easy_setopt( curl, CURLOPT_MAXREDIRS, 5 );
00193 
00194         // Timeout (30s)
00195         curl_easy_setopt( curl, CURLOPT_TIMEOUT, 30 );
00196 
00197         /* NOSIGNAL should be set to true for timeout to work in multithread
00198          * environments on Unix, requires libcurl 7.10 or more recent.
00199          * (this force avoiding the use of signal handlers)
00200          */
00201 #ifdef CURLOPT_NOSIGNAL
00202         curl_easy_setopt( curl, CURLOPT_NOSIGNAL, 1 );
00203 #endif
00204 
00205         curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, &RemoteRaster::_writeData );
00206         curl_easy_setopt( curl, CURLOPT_WRITEDATA, &file_data );
00207         //curl_easy_setopt( curl, CURLOPT_VERBOSE, 1 );
00208 
00209         CURLcode res = curl_easy_perform( curl );
00210 
00211         curl_easy_cleanup( curl );
00212 
00213         if ( file_data.stream ) {
00214 
00215           fclose( file_data.stream );
00216         }
00217 
00218         curl_global_cleanup();
00219 
00220         if ( CURLE_OK != res ) {
00221 
00222           std::string msg = "Could not fetch remote raster.\n";
00223           Log::instance()->error( msg.c_str() );
00224           throw RasterException( msg.c_str() );
00225         }
00226       }
00227       else {
00228 
00229         std::string msg = "Could not initialize remote raster fetcher.\n";
00230         Log::instance()->error( msg.c_str() );
00231         throw RasterException( msg.c_str() );
00232       }
00233 
00234       Log::instance()->debug( "Done!\n", str.c_str(), cache_id.c_str() );
00235 
00236       GdalRaster::createRaster( cached_ref, categ );
00237 
00238       // Erase lock and retries
00239       CacheManager::eraseCache( lock_file, OM_REMOTE_RASTER_SUBDIR );
00240       CacheManager::eraseCache( retries_file, OM_REMOTE_RASTER_SUBDIR );
00241     }
00242     catch ( RasterException& e ) {
00243 
00244       if ( e.getCode() != 1 ) {
00245 
00246         // Erase lock
00247         CacheManager::eraseCache( lock_file, OM_REMOTE_RASTER_SUBDIR );
00248       }
00249 
00250       throw;
00251     }
00252     catch (...) {
00253 
00254       // Erase lock
00255       CacheManager::eraseCache( lock_file, OM_REMOTE_RASTER_SUBDIR );
00256       throw;
00257     }
00258   }
00259 }
00260 
00261 #ifdef MPI_FOUND
00262 void
00263 RemoteRaster::createRaster( const string& output_file, const string& file, const MapFormat& format) {
00264 
00265   std::string msg = "Method createRaster() not available to create writable remote rasters.\n";
00266   Log::instance()->error( msg.c_str() );
00267   throw RasterException( msg.c_str() );
00268 }
00269 
00270 #else
00271 void
00272 RemoteRaster::createRaster( const string& file, const MapFormat& format) {
00273 
00274   std::string msg = "Method createRaster() not available to create writable remote rasters.\n";
00275   Log::instance()->warn( msg.c_str() );
00276   throw RasterException( msg.c_str() );
00277 }
00278 #endif
00279 
00280 /***********/
00281 /*** put ***/
00282 int
00283 RemoteRaster::put( Coord px, Coord py, Scalar val )
00284 {
00285   std::string msg = "Method put() not available for remote rasters.\n";
00286   Log::instance()->warn( msg.c_str() );
00287   throw RasterException( msg.c_str() );
00288 }
00289 
00290 /***********/
00291 /*** put ***/
00292 int
00293 RemoteRaster::put( Coord px, Coord py )
00294 {
00295   std::string msg = "Method put() not available for remote rasters.\n";
00296   Log::instance()->warn( msg.c_str() );
00297   throw RasterException( msg.c_str() );
00298 }
00299 
00300 /**************/
00301 /*** finish ***/
00302 void 
00303 RemoteRaster::finish()
00304 {
00305   std::string msg = "Method finish() not available for remote rasters.\n";
00306   Log::instance()->warn( msg.c_str() );
00307   throw RasterException( msg.c_str() );
00308 }
00309 
00310 /*********************/
00311 /*** delete Raster ***/
00312 int
00313 RemoteRaster::deleteRaster()
00314 {
00315   std::string msg = "Method deleteRaster() not available for remote rasters.\n";
00316   Log::instance()->warn( msg.c_str() );
00317   throw RasterException( msg.c_str() );
00318 }
00319 
00320 /*******************/
00321 /*** _write Data ***/
00322 size_t
00323 RemoteRaster::_writeData( void *buffer, size_t size, size_t nmemb, void *stream ) {
00324 
00325   struct CacheFile *out = reinterpret_cast<CacheFile *>(stream);
00326 
00327   if ( out && !out->stream ) {
00328 
00329     out->stream = fopen( out->fileName, "wb" );
00330 
00331     if ( ! out->stream ) {
00332 
00333       return -1;
00334     }
00335   }
00336 
00337   return fwrite( buffer, size, nmemb, out->stream );
00338 }
00339 
00340 /*******************************/
00341 /*** is From Rejected Source ***/
00342 bool
00343 RemoteRaster::isFromRejectedSource( const std::string& str ) {
00344 
00345   // Remote sources are untrusted by default
00346 
00347   if ( Settings::count( "ALLOW_RASTER_SOURCE" ) > 0 ) {
00348 
00349     // get host from url
00350     string host;
00351     string lower_url;
00352     transform( str.begin(), str.end(), std::back_inserter(lower_url), ::tolower );
00353 
00354     if ( str.size() < 9 ) { // ftp://x.x
00355 
00356       std::string msg = "Invalid identifier for remote raster (1).\n";
00357       Log::instance()->error( msg.c_str() );
00358       throw RasterException( msg.c_str() );
00359     }
00360 
00361     size_t prot_i = str.find("://");
00362 
00363     if ( prot_i == string::npos ) {
00364 
00365       std::string msg = "Missing protocol in remote raster identifier.\n";
00366       Log::instance()->error( msg.c_str() );
00367       throw RasterException( msg.c_str() );
00368     }
00369 
00370     size_t path_i = str.find("/", prot_i+3);
00371 
00372     if ( path_i == string::npos ) {
00373 
00374       // There must be at least one slash in the identifier! 
00375       std::string msg = "Invalid identifier for remote raster (2).\n";
00376       Log::instance()->error( msg.c_str() );
00377       throw RasterException( msg.c_str() );
00378     }
00379 
00380     host = lower_url.substr( prot_i+3, path_i - (prot_i+3) );
00381 
00382     size_t port_i = host.find(":");
00383 
00384     if ( port_i != string::npos ) {
00385 
00386       // Ignore port
00387       host = host.substr( 0, port_i );
00388     }
00389 
00390     vector<string> accepted_sources = Settings::getAll( "ALLOW_RASTER_SOURCE" );
00391 
00392     for( unsigned int i = 0; i < accepted_sources.size(); i++ ) {
00393 
00394       // This is how you can accept any source (don't do that)
00395       if ( accepted_sources[i].compare("*") == 0 ) {
00396 
00397         return false;
00398       }
00399 
00400       size_t pos = host.find( accepted_sources[i] );
00401 
00402       if ( pos == host.size() - accepted_sources[i].size() ) {
00403 
00404         // Configured source must match the end of the host
00405         return false;
00406       }
00407     }
00408   }
00409 
00410   return true;
00411 }