openModeller
Version 1.4.0
|
00001 00027 #include <openmodeller/occ_io/TapirOccurrences.hh> 00028 00029 #include <openmodeller/Occurrences.hh> 00030 00031 #include <openmodeller/Exceptions.hh> 00032 00033 #include <stdlib.h> 00034 00035 #include <string.h> 00036 using std::string; 00037 00038 #include <sstream> 00039 using std::ostringstream; 00040 00041 #include <curl/curl.h> 00042 00043 #include <expat.h> 00044 00045 #define TP_TEMPLATE_LOCATION "http://openmodeller.cria.org.br/xml/tapir/1.0/st_v3.xml" 00046 #define TP_TEMPLATE_STRSIZE 55 00047 #define TP_OUTPUT_MODEL_LOCATION "http://openmodeller.cria.org.br/xml/tapir/1.0/om.xml" 00048 #define TP_OUTPUT_MODEL_STRSIZE 52 00049 00050 /*****************************************/ 00051 /*** create OccurrencesReader callback ***/ 00052 OccurrencesReader * 00053 TapirOccurrences::CreateOccurrencesReaderCallback( const char *source, const char *coordSystem ) 00054 { 00055 return new TapirOccurrences( source, coordSystem ); 00056 } 00057 00058 /***************************/ 00059 /*** _curl header writer ***/ 00060 size_t 00061 TapirOccurrences::_curl_header_writer( void *ptr, size_t size, size_t nmemb, void *stream ) 00062 { 00063 if ( stream == NULL ) { 00064 00065 return 0; 00066 } 00067 00068 // *stream is actually a string object 00069 std::string& str = *( reinterpret_cast<std::string*>( stream ) ); 00070 00071 str.append( reinterpret_cast<const char*>(ptr), size*nmemb ); 00072 00073 return size*nmemb; 00074 } 00075 00076 /*************************/ 00077 /*** _curl body writer ***/ 00078 size_t 00079 TapirOccurrences::_curl_body_writer( void *ptr, size_t size, size_t nmemb, void *stream ) 00080 { 00081 if ( stream == NULL ) { 00082 00083 return 0; 00084 } 00085 00086 // *stream is actually a string object 00087 std::string& str = *( reinterpret_cast<std::string*>( stream ) ); 00088 00089 str.append( reinterpret_cast<const char*>(ptr), size*nmemb ); 00090 00091 return size*nmemb; 00092 } 00093 00094 /*******************/ 00095 /*** Constructor ***/ 00096 TapirOccurrences::TapirOccurrences( const char *source, const char *coordSystem ) 00097 { 00098 if ( curl_global_init( CURL_GLOBAL_ALL ) != CURLE_OK ) { 00099 00100 throw OccurrencesReaderException( "Could not initialize libcurl" ); 00101 } 00102 00103 _loaded = false; 00104 00105 _source = (char *) source; // TAPIR endpoint 00106 00107 _coord_system = (char *) coordSystem; 00108 00109 _default_limit = 100; 00110 } 00111 00112 00113 /******************/ 00114 /*** Destructor ***/ 00115 TapirOccurrences::~TapirOccurrences() 00116 { 00117 curl_global_cleanup(); 00118 } 00119 00120 00121 /************/ 00122 /*** load ***/ 00123 bool 00124 TapirOccurrences::load() 00125 { 00126 if ( _loaded ) { 00127 00128 return true; 00129 } 00130 00131 Log::instance()->info( "Checking endpoint using TAPIR driver\n" ); 00132 00133 Log::instance()->info( "CURL version is %s\n", curl_version() ); 00134 00135 // Prepare CURL handle 00136 CURL * curl_handle = curl_easy_init(); 00137 00138 if ( curl_handle == NULL ) { 00139 00140 Log::instance()->error( "TapirOccurrences::load - Could not initialize curl handle\n" ); 00141 return false; 00142 } 00143 00144 // Prepare TAPIR capabilities request 00145 std::string capabilities_url( _source ); 00146 00147 if ( capabilities_url.find( "?" ) != string::npos ) { 00148 00149 capabilities_url.append( "&" ); 00150 } 00151 else { 00152 00153 capabilities_url.append( "?" ); 00154 } 00155 00156 capabilities_url.append( "op=c" ); 00157 00158 // Set CURL options 00159 if ( curl_easy_setopt( curl_handle, CURLOPT_URL, capabilities_url.c_str() ) != CURLE_OK ) { 00160 00161 Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_URL\n" ); 00162 curl_easy_cleanup( curl_handle ); 00163 return false; 00164 } 00165 00166 if ( curl_easy_setopt( curl_handle, CURLOPT_HEADERFUNCTION, &TapirOccurrences::_curl_header_writer ) != CURLE_OK ) { 00167 00168 Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_HEADERFUNCTION\n" ); 00169 curl_easy_cleanup( curl_handle ); 00170 return false; 00171 } 00172 00173 std::string header; 00174 00175 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEHEADER, &header ) != CURLE_OK ) { 00176 00177 Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_WRITEHEADER\n" ); 00178 curl_easy_cleanup( curl_handle ); 00179 return false; 00180 } 00181 00182 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEFUNCTION, &TapirOccurrences::_curl_body_writer ) != CURLE_OK ) { 00183 00184 Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_WRITEFUNCTION\n" ); 00185 curl_easy_cleanup( curl_handle ); 00186 return false; 00187 } 00188 00189 std::string capabilities_response; 00190 00191 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEDATA, &capabilities_response ) != CURLE_OK ) { 00192 00193 Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_WRITEDATA\n" ); 00194 curl_easy_cleanup( curl_handle ); 00195 return false; 00196 } 00197 00198 Log::instance()->info( "Getting capabilities using %s\n", capabilities_url.c_str() ); 00199 00200 // Send capabilities request 00201 CURLcode curl_code = curl_easy_perform( curl_handle ); 00202 00203 if ( curl_code != CURLE_OK ) 00204 { 00205 Log::instance()->error( "TapirOccurrences::load - Could not get service capabilities from the specified endpoint (CURL code error: %d)\n", curl_code ); 00206 curl_easy_cleanup( curl_handle ); 00207 return false; 00208 } 00209 00210 curl_easy_cleanup( curl_handle ); 00211 00212 // Check content type returned 00213 if ( header.find( "Content-Type: text/xml" ) == string::npos ) 00214 { 00215 Log::instance()->warn( "TapirOccurrences::load - URL does not seem to be a TAPIR endpoint (HTTP Content-Type header is not text/xml)\n" ); 00216 00217 return false; 00218 } 00219 00220 // Data structure to store relevant capabilities information 00221 CapabilitiesInfo info; 00222 00223 // Parse capabilities response 00224 if ( ! _parseCapabilities( &capabilities_response, &info ) ) { 00225 00226 return false; 00227 } 00228 00229 // Check capabilitites response 00230 if ( ! info._is_tapir ) { 00231 00232 Log::instance()->warn( "TapirOccurrences::load - URL does not seem to be a TAPIR endpoint (no TAPIR response element detected)\n" ); 00233 return false; 00234 } 00235 if ( ! info._has_guid ) { 00236 00237 Log::instance()->error( "TapirOccurrences::load - Provider did not map the DarwinCore GlobalUniqueIdentifier concept\n" ); 00238 return false; 00239 } 00240 if ( ! info._has_name ) { 00241 00242 Log::instance()->error( "TapirOccurrences::load - Provider did not map the DarwinCore ScientificName concept\n" ); 00243 return false; 00244 } 00245 if ( ! info._has_long ) { 00246 00247 Log::instance()->error( "TapirOccurrences::load - Provider did not map the DarwinCore DecimalLongitude concept from the geospatial extension\n" ); 00248 return false; 00249 } 00250 if ( ! info._has_lat ) { 00251 00252 Log::instance()->error( "TapirOccurrences::load - Provider did not map the DarwinCore DecimalLatitude concept from the geospatial extension\n" ); 00253 return false; 00254 } 00255 if ( ( ! info._accepts_om_template ) && ( ! info._accepts_any_model ) && ! info._accepts_om_model ) { 00256 00257 Log::instance()->error( "TapirOccurrences::load - Provider must accept searches with the openModeller query template or searches with the openModeller output model or searches with any output model\n" ); 00258 return false; 00259 } 00260 00261 Log::instance()->info( "Finished reading capabilities\n" ); 00262 00263 _loaded = true; 00264 00265 return true; 00266 } 00267 00268 00269 /**************************/ 00270 /*** parse capabilities ***/ 00271 bool 00272 TapirOccurrences::_parseCapabilities( const std::string *xml, CapabilitiesInfo *info ) 00273 { 00274 XML_Parser parser = XML_ParserCreateNS( NULL, '/' ); 00275 00276 if ( ! parser ) { 00277 00278 Log::instance()->error( "Unable to allocate memory for capabilities response parser" ); 00279 return false; 00280 } 00281 00282 info->_parser = parser; 00283 00284 XML_SetElementHandler( parser, &TapirOccurrences::_startCapabilitiesElement, NULL ); 00285 00286 XML_SetUserData( parser, info ); 00287 00288 if ( ! XML_Parse( parser, xml->c_str(), xml->size(), 1 ) ) { 00289 00290 XML_Error error_code = XML_GetErrorCode( parser ); 00291 std::ostringstream errormsg; 00292 errormsg << XML_ErrorString( error_code ) 00293 << " at Line " 00294 << XML_GetCurrentLineNumber( parser ) 00295 << " column " 00296 << XML_GetCurrentColumnNumber( parser ) 00297 << std::ends; 00298 00299 Log::instance()->error( "XML Parser fatal error for capabilities response: %s\n", errormsg.str().c_str() ); 00300 XML_ParserFree( parser ); 00301 return false; 00302 } 00303 00304 XML_ParserFree( parser ); 00305 00306 _provider_limit = atoi( info->_max_records.c_str() ); 00307 00308 return true; 00309 } 00310 00311 00312 /**********************************/ 00313 /*** start capabilities element ***/ 00314 void 00315 TapirOccurrences::_startCapabilitiesElement( void *data, const char *el, const char **attr ) 00316 { 00317 CapabilitiesInfo& info = *( reinterpret_cast<CapabilitiesInfo*>( data ) ); 00318 00319 // response element 00320 if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/response", 37 ) == 0 ) { 00321 00322 info._is_tapir = true; 00323 } 00324 // mappedConcept element 00325 else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/mappedConcept", 42 ) == 0 ) { 00326 00327 for ( int i = 0; attr[i]; i += 2 ) { 00328 00329 // id attribute (TODO: check "searchable" attribute too) 00330 if ( strncmp( attr[i], "id", 2 ) == 0 ) { 00331 00332 if ( strncmp( attr[i+1], "http://rs.tdwg.org/dwc/dwcore/GlobalUniqueIdentifier", 52 ) == 0 ) { 00333 info._has_guid = true; 00334 } 00335 else if ( strncmp( attr[i+1], "http://rs.tdwg.org/dwc/dwcore/ScientificName", 44 ) == 0 ) { 00336 00337 info._has_name = true; 00338 } 00339 else if ( strncmp( attr[i+1], "http://rs.tdwg.org/dwc/geospatial/DecimalLongitude", 50 ) == 0 ) { 00340 00341 info._has_long = true; 00342 } 00343 else if ( strncmp( attr[i+1], "http://rs.tdwg.org/dwc/geospatial/DecimalLatitude/", 49 ) == 0 ) { 00344 00345 info._has_lat = true; 00346 } 00347 } 00348 } 00349 } 00350 // template element 00351 else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/template", 37 ) == 0 ) { 00352 00353 for ( int i = 0; attr[i]; i += 2 ) { 00354 00355 // location attribute 00356 if ( strncmp( attr[i], "location", 8 ) == 0 && 00357 strncmp( attr[i+1], TP_TEMPLATE_LOCATION, TP_TEMPLATE_STRSIZE ) == 0 ) { 00358 00359 info._accepts_om_template = true; 00360 } 00361 } 00362 } 00363 // outputModel element 00364 else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/outputModel", 40 ) == 0 ) { 00365 00366 for ( int i = 0; attr[i]; i += 2 ) { 00367 00368 // location attribute 00369 if ( strncmp( attr[i], "location", 8 ) == 0 && 00370 strncmp( attr[i+1], TP_OUTPUT_MODEL_LOCATION, TP_OUTPUT_MODEL_STRSIZE ) == 0 ) { 00371 00372 info._accepts_om_model = true; 00373 } 00374 } 00375 } 00376 // anyOutputModels element 00377 else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/anyOutputModels", 44 ) == 0 ) { 00378 00379 info._accepts_any_model = true; 00380 } 00381 // maxElementRepetitions element 00382 else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/maxElementRepetitions", 50 ) == 0 ) { 00383 XML_SetCharacterDataHandler( info._parser, &TapirOccurrences::_maxRecordsDataHandler ); 00384 00385 XML_SetElementHandler( info._parser, &TapirOccurrences::_startCapabilitiesElement, 00386 &TapirOccurrences::_endCapabilitiesElement ); 00387 } 00388 } 00389 00390 00391 /********************************/ 00392 /*** max records data handler ***/ 00393 void 00394 TapirOccurrences::_maxRecordsDataHandler( void *data, const char *value, int len ) 00395 { 00396 CapabilitiesInfo& info = *( reinterpret_cast<CapabilitiesInfo*>( data ) ); 00397 00398 info._max_records.append( value, len ); 00399 } 00400 00401 00402 /********************************/ 00403 /*** end capabilities element ***/ 00404 void 00405 TapirOccurrences::_endCapabilitiesElement( void *data, const char *el ) 00406 { 00407 // maxElementRepetitions element 00408 if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/maxElementRepetitions", 50 ) == 0 ) { 00409 00410 CapabilitiesInfo& info = *( reinterpret_cast<CapabilitiesInfo*>( data ) ); 00411 00412 XML_SetCharacterDataHandler( info._parser, NULL ); 00413 XML_SetElementHandler( info._parser, &TapirOccurrences::_startCapabilitiesElement, NULL ); 00414 } 00415 } 00416 00417 00418 /*********************/ 00419 /*** get Presences ***/ 00420 OccurrencesPtr 00421 TapirOccurrences::getPresences( const char *groupId ) 00422 { 00423 // If group was not specified, return empty set 00424 if ( ! groupId ) { 00425 00426 return new OccurrencesImpl( 1 ); 00427 } 00428 00429 LstOccurrences::iterator ocs = _presences.begin(); 00430 LstOccurrences::iterator end = _presences.end(); 00431 00432 while ( ocs != end ) { 00433 00434 OccurrencesPtr oc = *ocs; 00435 00436 if ( ! strcasecmp( groupId, oc->label() ) ) { 00437 00438 _presences.erase( ocs ); 00439 00440 return oc; 00441 } 00442 00443 ++ocs; 00444 } 00445 00446 // If not found, create new group and retrieve records from provider 00447 00448 OccurrencesPtr occurrences( new OccurrencesImpl( groupId, _coord_system ) ); 00449 00450 TapirRecordData search_data; 00451 00452 search_data._occurrences = occurrences; 00453 search_data._next = 0; 00454 00455 int limit = _default_limit; 00456 00457 if ( _provider_limit > 0 && _provider_limit < _default_limit ) { 00458 00459 limit = _provider_limit; 00460 } 00461 00462 while ( search_data._next >= 0 ) { 00463 00464 Log::instance()->info( "Fetching records (start %d, limit %d)\n", search_data._next, limit ); 00465 00466 if ( ! _retrieveRecords( &search_data, limit ) ) { 00467 00468 break; 00469 } 00470 } 00471 00472 _presences.push_back( occurrences ); 00473 00474 return occurrences; 00475 } 00476 00477 00478 /************************/ 00479 /*** retrieve Records ***/ 00480 bool 00481 TapirOccurrences::_retrieveRecords( TapirRecordData *data, int limit ) 00482 { 00483 // Prepare CURL handle 00484 CURL * curl_handle = curl_easy_init(); 00485 00486 if ( curl_handle == NULL ) { 00487 00488 Log::instance()->error( "TapirOccurrences::_retrieveRecords - Could not initialize curl handle\n" ); 00489 return false; 00490 } 00491 00492 // Prepare TAPIR search request 00493 std::string source( _source ); 00494 00495 std::ostringstream search_url; 00496 00497 search_url << source.c_str(); 00498 00499 if ( source.find( "?" ) != string::npos ) { 00500 00501 search_url << "&"; 00502 } 00503 else { 00504 00505 search_url << "?"; 00506 } 00507 00508 search_url << "op=s&s=" << data->_next; 00509 00510 search_url << "&l=" << limit; 00511 00512 // curl_easy_escape was included in libcurl version 7.15.4 00513 #if LIBCURL_VERSION_NUM >= 0x070f04 00514 search_url << "&sciname=" << curl_easy_escape( curl_handle, data->_occurrences->label(), 0 ); 00515 search_url << "&t=" << curl_easy_escape( curl_handle, TP_TEMPLATE_LOCATION, 0 ); 00516 #else 00517 search_url << "&sciname=" << curl_escape( data->_occurrences->label(), 0 ); 00518 search_url << "&t=" << curl_escape( TP_TEMPLATE_LOCATION, 0 ); 00519 #endif 00520 00521 // After using next to make the URL, set it to -1 to stop the process in case 00522 // the response does not return the "next" attribute 00523 data->_next = -1; 00524 00525 // Set CURL options 00526 if ( curl_easy_setopt( curl_handle, CURLOPT_URL, search_url.str().c_str() ) != CURLE_OK ) { 00527 00528 Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_URL\n" ); 00529 curl_easy_cleanup( curl_handle ); 00530 return false; 00531 } 00532 00533 if ( curl_easy_setopt( curl_handle, CURLOPT_HEADERFUNCTION, &TapirOccurrences::_curl_header_writer ) != CURLE_OK ) { 00534 00535 Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_HEADERFUNCTION\n" ); 00536 curl_easy_cleanup( curl_handle ); 00537 return false; 00538 } 00539 00540 std::string header; 00541 00542 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEHEADER, &header ) != CURLE_OK ) { 00543 00544 Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEHEADER\n" ); 00545 curl_easy_cleanup( curl_handle ); 00546 return false; 00547 } 00548 00549 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEFUNCTION, &TapirOccurrences::_curl_body_writer ) != CURLE_OK ) { 00550 00551 Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEFUNCTION\n" ); 00552 curl_easy_cleanup( curl_handle ); 00553 return false; 00554 } 00555 00556 std::string search_response; 00557 00558 if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEDATA, &search_response ) != CURLE_OK ) { 00559 00560 Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEDATA\n" ); 00561 curl_easy_cleanup( curl_handle ); 00562 return false; 00563 } 00564 00565 Log::instance()->info( "Searching records using %s\n", search_url.str().c_str() ); 00566 00567 // Send search request 00568 CURLcode curl_code = curl_easy_perform( curl_handle ); 00569 00570 if ( curl_code != CURLE_OK ) 00571 { 00572 const char * error_msg = curl_easy_strerror( curl_code ); 00573 00574 Log::instance()->error( "TapirOccurrences::_retrieveRecords - Could not search records from the specified TAPIR endpoint (CURL error: %s)\n", error_msg ); 00575 00576 curl_easy_cleanup( curl_handle ); 00577 return false; 00578 } 00579 00580 curl_easy_cleanup( curl_handle ); 00581 00582 // Check content type returned 00583 if ( header.find( "Content-Type: text/xml" ) == string::npos ) 00584 { 00585 Log::instance()->error( "TapirOccurrences::_retrieveRecords - URL does not seem to be a TAPIR endpoint (HTTP Content-Type header is not text/xml)\n" ); 00586 return false; 00587 } 00588 00589 // Parse search response 00590 XML_Parser parser = XML_ParserCreateNS( NULL, '/' ); 00591 00592 if ( ! parser ) { 00593 00594 Log::instance()->error( "Unable to allocate memory for search response parser" ); 00595 return false; 00596 } 00597 00598 XML_SetElementHandler( parser, &TapirOccurrences::_startSearchElement, NULL ); 00599 00600 XML_SetUserData( parser, data ); 00601 00602 if ( ! XML_Parse( parser, search_response.c_str(), search_response.size(), 1 ) ) { 00603 00604 XML_Error error_code = XML_GetErrorCode( parser ); 00605 std::ostringstream errormsg; 00606 errormsg << XML_ErrorString( error_code ) 00607 << " at Line " 00608 << XML_GetCurrentLineNumber( parser ) 00609 << " column " 00610 << XML_GetCurrentColumnNumber( parser ) 00611 << std::ends; 00612 00613 Log::instance()->error( "XML Parser fatal error for search response: %s\n", errormsg.str().c_str() ); 00614 XML_ParserFree( parser ); 00615 return false; 00616 } 00617 00618 XML_ParserFree( parser ); 00619 00620 return true; 00621 } 00622 00623 00624 /****************************/ 00625 /*** start search element ***/ 00626 void 00627 TapirOccurrences::_startSearchElement( void *data, const char *el, const char **attr ) 00628 { 00629 TapirRecordData& search_data = *( reinterpret_cast<TapirRecordData*>( data ) ); 00630 00631 // occ element 00632 if ( strlen( el ) == 49 && 00633 strncmp( el, "http://openmodeller.cria.org.br/xml/tapir/1.0/occ", 49 ) == 0 ) { 00634 00635 std::string guid(""); 00636 00637 Coord lg = 0; 00638 Coord lt = 0; 00639 00640 for ( int i = 0; attr[i]; i += 2 ) { 00641 00642 // guid attribute 00643 if ( strncmp( attr[i], "guid", 4 ) == 0 ) { 00644 00645 guid = attr[i+1]; 00646 } 00647 // long attribute 00648 else if ( strncmp( attr[i], "long", 4 ) == 0 ) { 00649 00650 lg = Coord( atof( attr[i+1] ) ); 00651 } 00652 // lat attribute 00653 else if ( strncmp( attr[i], "lat", 3 ) == 0 ) { 00654 00655 lt = Coord( atof( attr[i+1] ) ); 00656 } 00657 // TODO: get datum and convert coordinates when necessary 00658 } 00659 00660 search_data._occurrences->createOccurrence( guid.c_str(), lg, lt, 0.0, 1, 0, 0 ); 00661 } 00662 // summary element 00663 if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/summary", 36 ) == 0 ) { 00664 00665 for ( int i = 0; attr[i]; i += 2 ) { 00666 00667 // next attribute 00668 if ( strncmp( attr[i], "next", 4 ) == 0 ) { 00669 00670 search_data._next = atoi( attr[i+1] ); 00671 } 00672 } 00673 } 00674 }