openModeller
Version 1.4.0
|
00001 00027 #include <openmodeller/occ_io/GbifOccurrences.hh> 00028 00029 #include <openmodeller/Occurrences.hh> 00030 00031 #include <openmodeller/Exceptions.hh> 00032 00033 #include <stdlib.h> 00034 00035 //c style include needed for strcmp etc 00036 #include <string.h> 00037 using std::string; 00038 00039 #include <sstream> 00040 using std::ostringstream; 00041 00042 #include <curl/curl.h> 00043 00044 #include <expat.h> 00045 00046 /*****************************************/ 00047 /*** create OccurrencesReader callback ***/ 00048 OccurrencesReader * 00049 GbifOccurrences::CreateOccurrencesReaderCallback( const char *source, const char *coordSystem ) 00050 { 00051 return new GbifOccurrences( source, coordSystem ); 00052 } 00053 00054 /***************************/ 00055 /*** _curl header writer ***/ 00056 size_t 00057 GbifOccurrences::_curl_header_writer( void *ptr, size_t size, size_t nmemb, void *stream ) 00058 { 00059 if ( stream == NULL ) { 00060 00061 return 0; 00062 } 00063 00064 // *stream is actually a string object 00065 std::string& str = *( reinterpret_cast<std::string*>( stream ) ); 00066 00067 str.append( reinterpret_cast<const char*>(ptr), size*nmemb ); 00068 00069 return size*nmemb; 00070 } 00071 00072 /*************************/ 00073 /*** _curl body writer ***/ 00074 size_t 00075 GbifOccurrences::_curl_body_writer( void *ptr, size_t size, size_t nmemb, void *stream ) 00076 { 00077 if ( stream == NULL ) { 00078 00079 return 0; 00080 } 00081 00082 // *stream is actually a string object 00083 std::string& str = *( reinterpret_cast<std::string*>( stream ) ); 00084 00085 str.append( reinterpret_cast<const char*>(ptr), size*nmemb ); 00086 00087 return size*nmemb; 00088 } 00089 00090 /*******************/ 00091 /*** Constructor ***/ 00092 GbifOccurrences::GbifOccurrences( const char *source, const char *coordSystem ) 00093 { 00094 if ( curl_global_init( CURL_GLOBAL_ALL ) != CURLE_OK ) { 00095 00096 throw OccurrencesReaderException( "Could not initialize libcurl" ); 00097 } 00098 00099 _loaded = false; 00100 00101 _source = (char *) source; // endpoint 00102 00103 _coord_system = (char *) coordSystem; 00104 00105 _default_limit = 100; 00106 } 00107 00108 00109 /******************/ 00110 /*** Destructor ***/ 00111 GbifOccurrences::~GbifOccurrences() 00112 { 00113 curl_global_cleanup(); 00114 } 00115 00116 00117 /************/ 00118 /*** load ***/ 00119 bool 00120 GbifOccurrences::load() 00121 { 00122 if ( _loaded ) { 00123 00124 return true; 00125 } 00126 00127 Log::instance()->info( "Checking endpoint using GBIF driver\n" ); 00128 00129 Log::instance()->info( "CURL version is %s\n", curl_version() ); 00130 00131 // Prepare CURL handle 00132 CURL * curl_handle = curl_easy_init(); 00133 00134 if ( curl_handle == NULL ) { 00135 00136 Log::instance()->error( "GbifOccurrences::load - Could not initialize curl handle\n" ); 00137 return false; 00138 } 00139 00140 // Prepare request with no parameters just to check if it's the GBIF service 00141 00142 00143 // Set CURL options 00144 if ( curl_easy_setopt( curl_handle, CURLOPT_URL, _source ) != CURLE_OK ) { 00145 00146 Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_URL\n" ); 00147 curl_easy_cleanup( curl_handle ); 00148 return false; 00149 } 00150 00151 if ( curl_easy_setopt( curl_handle, CURLOPT_HEADERFUNCTION, &GbifOccurrences::_curl_header_writer ) != CURLE_OK ) { 00152 00153 Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_HEADERFUNCTION\n" ); 00154 curl_easy_cleanup( curl_handle ); 00155 return false; 00156 } 00157 00158 std::string header; 00159 00160 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEHEADER, &header ) != CURLE_OK ) { 00161 00162 Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_WRITEHEADER\n" ); 00163 curl_easy_cleanup( curl_handle ); 00164 return false; 00165 } 00166 00167 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEFUNCTION, &GbifOccurrences::_curl_body_writer ) != CURLE_OK ) { 00168 00169 Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_WRITEFUNCTION\n" ); 00170 curl_easy_cleanup( curl_handle ); 00171 return false; 00172 } 00173 00174 std::string response; 00175 00176 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEDATA, &response ) != CURLE_OK ) { 00177 00178 Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_WRITEDATA\n" ); 00179 curl_easy_cleanup( curl_handle ); 00180 return false; 00181 } 00182 00183 Log::instance()->info( "Getting service info using %s\n", _source ); 00184 00185 // Send request 00186 CURLcode curl_code = curl_easy_perform( curl_handle ); 00187 00188 if ( curl_code != CURLE_OK ) 00189 { 00190 Log::instance()->error( "GbifOccurrences::load - Could not get service info from the specified endpoint (CURL code error: %d)\n", curl_code ); 00191 curl_easy_cleanup( curl_handle ); 00192 return false; 00193 } 00194 00195 curl_easy_cleanup( curl_handle ); 00196 00197 // Check content type returned 00198 if ( header.find( "Content-Type: text/xml" ) == string::npos ) 00199 { 00200 Log::instance()->error( "GbifOccurrences::load - URL does not seem to be a GBIF REST WS endpoint (HTTP Content-Type header is not text/xml)\n" ); 00201 00202 Log::instance()->info( "HEADER: %s\n", header.c_str() ); 00203 00204 return false; 00205 } 00206 00207 // Data structure to store relevant service information 00208 ServiceInfo info; 00209 00210 // Parse response 00211 if ( ! _parseServiceInfo( &response, &info ) ) { 00212 00213 return false; 00214 } 00215 00216 // Check response 00217 if ( ! info._is_gbif ) { 00218 00219 Log::instance()->error( "GbifOccurrences::load - URL does not seem to be a GBIF WS REST endpoint (root element is not gbifResponse)\n" ); 00220 return false; 00221 } 00222 00223 Log::instance()->info( "Finished checking service info\n" ); 00224 00225 _loaded = true; 00226 00227 return true; 00228 } 00229 00230 00231 /**************************/ 00232 /*** parse service info ***/ 00233 bool 00234 GbifOccurrences::_parseServiceInfo( const std::string *xml, ServiceInfo *info ) 00235 { 00236 XML_Parser parser = XML_ParserCreateNS( NULL, '/' ); 00237 00238 if ( ! parser ) { 00239 00240 Log::instance()->error( "Unable to allocate memory for capabilities response parser" ); 00241 return false; 00242 } 00243 00244 info->_parser = parser; 00245 00246 XML_SetElementHandler( parser, &GbifOccurrences::_startServiceInfoElement, NULL ); 00247 00248 XML_SetUserData( parser, info ); 00249 00250 if ( ! XML_Parse( parser, xml->c_str(), xml->size(), 1 ) ) { 00251 00252 XML_Error error_code = XML_GetErrorCode( parser ); 00253 std::ostringstream errormsg; 00254 errormsg << XML_ErrorString( error_code ) 00255 << " at Line " 00256 << XML_GetCurrentLineNumber( parser ) 00257 << " column " 00258 << XML_GetCurrentColumnNumber( parser ) 00259 << std::ends; 00260 00261 Log::instance()->error( "XML Parser fatal error for capabilities response: %s\n", errormsg.str().c_str() ); 00262 XML_ParserFree( parser ); 00263 return false; 00264 } 00265 00266 XML_ParserFree( parser ); 00267 00268 _provider_limit = atoi( info->_max_records.c_str() ); 00269 00270 return true; 00271 } 00272 00273 00274 /**********************************/ 00275 /*** start service info element ***/ 00276 void 00277 GbifOccurrences::_startServiceInfoElement( void *data, const char *el, const char **attr ) 00278 { 00279 ServiceInfo& info = *( reinterpret_cast<ServiceInfo*>( data ) ); 00280 00281 // gbifResponse element 00282 if ( strncmp( el, "http://portal.gbif.org/ws/response/gbif/gbifResponse", 52 ) == 0 ) { 00283 00284 info._is_gbif = true; 00285 } 00286 // parameter element 00287 else if ( strncmp( el, "http://portal.gbif.org/ws/response/gbif/parameter", 49 ) == 0 ) { 00288 00289 bool is_maxresults = false; 00290 00291 std::string value(""); 00292 00293 for ( int i = 0; attr[i]; i += 2 ) { 00294 00295 // name attribute 00296 if ( strncmp( attr[i], "name", 4 ) == 0 ) { 00297 00298 if ( strncmp( attr[i+1], "maxresults", 10 ) == 0 ) { 00299 is_maxresults = true; 00300 } 00301 } 00302 else if ( strncmp( attr[i], "value", 5 ) == 0 ) { 00303 00304 value = attr[i+1]; 00305 } 00306 00307 if ( is_maxresults ) { 00308 00309 info._max_records = value; 00310 00311 // No need to keep parsing 00312 XML_SetElementHandler( info._parser, NULL, NULL ); 00313 } 00314 } 00315 } 00316 } 00317 00318 00319 /*********************/ 00320 /*** get Presences ***/ 00321 OccurrencesPtr 00322 GbifOccurrences::getPresences( const char *groupId ) 00323 { 00324 // If group was not specified, return empty set 00325 if ( ! groupId ) { 00326 00327 return new OccurrencesImpl( 1 ); 00328 } 00329 00330 LstOccurrences::iterator ocs = _presences.begin(); 00331 LstOccurrences::iterator end = _presences.end(); 00332 00333 while ( ocs != end ) { 00334 00335 OccurrencesPtr oc = *ocs; 00336 00337 if ( ! strcasecmp( groupId, oc->label() ) ) { 00338 00339 _presences.erase( ocs ); 00340 00341 return oc; 00342 } 00343 00344 ++ocs; 00345 } 00346 00347 // If not found, create new group and retrieve records from provider 00348 00349 OccurrencesPtr occurrences( new OccurrencesImpl( groupId, _coord_system ) ); 00350 00351 GbifRecordData search_data; 00352 00353 search_data._occurrences = occurrences; 00354 search_data._next = 0; 00355 00356 int limit = _default_limit; 00357 00358 if ( _provider_limit > 0 && _provider_limit < _default_limit ) { 00359 00360 limit = _provider_limit; 00361 } 00362 00363 while ( search_data._next >= 0 ) { 00364 00365 Log::instance()->info( "Fetching records (start %d, limit %d)\n", search_data._next, limit ); 00366 00367 if ( ! _retrieveRecords( &search_data, limit ) ) { 00368 00369 break; 00370 } 00371 } 00372 00373 _presences.push_back( occurrences ); 00374 00375 return occurrences; 00376 } 00377 00378 00379 /************************/ 00380 /*** retrieve Records ***/ 00381 bool 00382 GbifOccurrences::_retrieveRecords( GbifRecordData *data, int limit ) 00383 { 00384 // Prepare CURL handle 00385 CURL * curl_handle = curl_easy_init(); 00386 00387 if ( curl_handle == NULL ) { 00388 00389 Log::instance()->error( "GbifOccurrences::_retrieveRecords - Could not initialize curl handle\n" ); 00390 return false; 00391 } 00392 00393 // Prepare search request 00394 std::string source( _source ); 00395 00396 std::ostringstream search_url; 00397 00398 search_url << source.c_str(); 00399 00400 if ( source.find( "?" ) != string::npos ) { 00401 00402 search_url << "&"; 00403 } 00404 else { 00405 00406 search_url << "?"; 00407 } 00408 00409 search_url << "startindex=" << data->_next; 00410 00411 search_url << "&maxresults=" << limit; 00412 00413 // curl_easy_escape was included in libcurl version 7.15.4 00414 #if LIBCURL_VERSION_NUM >= 0x070f04 00415 search_url << "&scientificname=" << curl_easy_escape( curl_handle, data->_occurrences->label(), 0 ); 00416 #else 00417 search_url << "&scientificname=" << curl_escape( data->_occurrences->label(), 0 ); 00418 #endif 00419 00420 search_url << "&format=brief&coordinatestatus=true&coordinateissues=false"; 00421 00422 // After using next to make the URL, set it to -1 to stop the process in case 00423 // the response does not return the "next" attribute 00424 data->_next = -1; 00425 00426 // Set CURL options 00427 if ( curl_easy_setopt( curl_handle, CURLOPT_URL, search_url.str().c_str() ) != CURLE_OK ) { 00428 00429 Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_URL\n" ); 00430 curl_easy_cleanup( curl_handle ); 00431 return false; 00432 } 00433 00434 if ( curl_easy_setopt( curl_handle, CURLOPT_HEADERFUNCTION, &GbifOccurrences::_curl_header_writer ) != CURLE_OK ) { 00435 00436 Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_HEADERFUNCTION\n" ); 00437 curl_easy_cleanup( curl_handle ); 00438 return false; 00439 } 00440 00441 std::string header; 00442 00443 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEHEADER, &header ) != CURLE_OK ) { 00444 00445 Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEHEADER\n" ); 00446 curl_easy_cleanup( curl_handle ); 00447 return false; 00448 } 00449 00450 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEFUNCTION, &GbifOccurrences::_curl_body_writer ) != CURLE_OK ) { 00451 00452 Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEFUNCTION\n" ); 00453 curl_easy_cleanup( curl_handle ); 00454 return false; 00455 } 00456 00457 std::string search_response; 00458 00459 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEDATA, &search_response ) != CURLE_OK ) { 00460 00461 Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEDATA\n" ); 00462 curl_easy_cleanup( curl_handle ); 00463 return false; 00464 } 00465 00466 Log::instance()->info( "Searching records using %s\n", search_url.str().c_str() ); 00467 00468 // Send search request 00469 CURLcode curl_code = curl_easy_perform( curl_handle ); 00470 00471 if ( curl_code != CURLE_OK ) 00472 { 00473 const char * error_msg = curl_easy_strerror( curl_code ); 00474 00475 Log::instance()->error( "GbifOccurrences::_retrieveRecords - Could not search records from the specified GBIF endpoint (CURL error: %s)\n", error_msg ); 00476 00477 curl_easy_cleanup( curl_handle ); 00478 return false; 00479 } 00480 00481 curl_easy_cleanup( curl_handle ); 00482 00483 // Check content type returned 00484 if ( header.find( "Content-Type: text/xml" ) == string::npos ) 00485 { 00486 Log::instance()->error( "GbifOccurrences::_retrieveRecords - URL does not seem to be a GBIF WS REST endpoint (HTTP Content-Type header is not text/xml)\n" ); 00487 return false; 00488 } 00489 00490 // Parse search response 00491 XML_Parser parser = XML_ParserCreateNS( NULL, '/' ); 00492 00493 if ( ! parser ) { 00494 00495 Log::instance()->error( "Unable to allocate memory for search response parser" ); 00496 return false; 00497 } 00498 00499 data->_parser = parser; 00500 00501 XML_SetElementHandler( parser, &GbifOccurrences::_startSearchElement, &GbifOccurrences::_endSearchElement ); 00502 00503 XML_SetUserData( parser, data ); 00504 00505 if ( ! XML_Parse( parser, search_response.c_str(), search_response.size(), 1 ) ) { 00506 00507 XML_Error error_code = XML_GetErrorCode( parser ); 00508 std::ostringstream errormsg; 00509 errormsg << XML_ErrorString( error_code ) 00510 << " at Line " 00511 << XML_GetCurrentLineNumber( parser ) 00512 << " column " 00513 << XML_GetCurrentColumnNumber( parser ) 00514 << std::ends; 00515 00516 Log::instance()->error( "XML Parser fatal error for search response: %s\n", errormsg.str().c_str() ); 00517 XML_ParserFree( parser ); 00518 return false; 00519 } 00520 00521 XML_ParserFree( parser ); 00522 00523 return true; 00524 } 00525 00526 00527 /****************************/ 00528 /*** start search element ***/ 00529 void 00530 GbifOccurrences::_startSearchElement( void *data, const char *el, const char **attr ) 00531 { 00532 GbifRecordData& search_data = *( reinterpret_cast<GbifRecordData*>( data ) ); 00533 00534 // summary element 00535 if ( strncmp( el, "http://portal.gbif.org/ws/response/gbif/summary", 47 ) == 0 ) { 00536 00537 for ( int i = 0; attr[i]; i += 2 ) { 00538 00539 // next attribute 00540 if ( strncmp( attr[i], "next", 4 ) == 0 ) { 00541 00542 search_data._next = atoi( attr[i+1] ); 00543 } 00544 } 00545 } 00546 // TaxonOccurrence element 00547 else if ( strncmp( el, "http://rs.tdwg.org/ontology/voc/TaxonOccurrence#/TaxonOccurrence", 64 ) == 0 ) { 00548 00549 for ( int i = 0; attr[i]; i += 2 ) { 00550 00551 // rdf:about attribute 00552 if ( strncmp( attr[i], "http://www.w3.org/1999/02/22-rdf-syntax-ns#/about", 49 ) == 0 ) { 00553 00554 search_data._last_guid = attr[i+1]; 00555 } 00556 } 00557 } 00558 // decimalLatitude element 00559 else if ( strncmp( el, "http://rs.tdwg.org/ontology/voc/TaxonOccurrence#/decimalLatitude", 64 ) == 0 ) { 00560 00561 XML_SetCharacterDataHandler( search_data._parser, &GbifOccurrences::_ltDataHandler ); 00562 } 00563 // decimalLongitude element 00564 else if ( strncmp( el, "http://rs.tdwg.org/ontology/voc/TaxonOccurrence#/decimalLongitude", 65 ) == 0 ) { 00565 00566 XML_SetCharacterDataHandler( search_data._parser, &GbifOccurrences::_lgDataHandler ); 00567 } 00568 } 00569 00570 00571 /**************************/ 00572 /*** end search element ***/ 00573 void 00574 GbifOccurrences::_endSearchElement( void *data, const char *el ) 00575 { 00576 // TaxonOccurrence element 00577 if ( strncmp( el, "http://rs.tdwg.org/ontology/voc/TaxonOccurrence#/TaxonOccurrence", 64 ) == 0 ) { 00578 00579 GbifRecordData& search_data = *( reinterpret_cast<GbifRecordData*>( data ) ); 00580 00581 search_data._occurrences->createOccurrence( search_data._last_guid.c_str(), search_data._last_lg, search_data._last_lt, 0.0, 1, 0, 0 ); 00582 } 00583 } 00584 00585 00586 /***********************/ 00587 /*** lt data handler ***/ 00588 void 00589 GbifOccurrences::_ltDataHandler( void *data, const char *value, int len ) 00590 { 00591 GbifRecordData& search_data = *( reinterpret_cast<GbifRecordData*>( data ) ); 00592 00593 std::string lt(""); 00594 lt.append( value, len ); 00595 00596 search_data._last_lt = Coord( atof( lt.c_str() ) ); 00597 00598 XML_SetCharacterDataHandler( search_data._parser, NULL ); 00599 } 00600 00601 00602 /***********************/ 00603 /*** lg data handler ***/ 00604 void 00605 GbifOccurrences::_lgDataHandler( void *data, const char *value, int len ) 00606 { 00607 GbifRecordData& search_data = *( reinterpret_cast<GbifRecordData*>( data ) ); 00608 00609 std::string lg(""); 00610 lg.append( value, len ); 00611 00612 search_data._last_lg = Coord( atof( lg.c_str() ) ); 00613 00614 XML_SetCharacterDataHandler( search_data._parser, NULL ); 00615 }