openModeller  Version 1.5.0
TapirOccurrences.cpp
Go to the documentation of this file.
1 
28 
30 
32 
33 #include <stdlib.h>
34 
35 #include <string.h>
36 using std::string;
37 
38 #include <sstream>
39 using std::ostringstream;
40 
41 #include <curl/curl.h>
42 
43 #include <expat.h>
44 
45 #define TP_TEMPLATE_LOCATION "http://openmodeller.cria.org.br/xml/tapir/1.0/st_v3.xml"
46 #define TP_TEMPLATE_STRSIZE 55
47 #define TP_OUTPUT_MODEL_LOCATION "http://openmodeller.cria.org.br/xml/tapir/1.0/om.xml"
48 #define TP_OUTPUT_MODEL_STRSIZE 52
49 
50 /*****************************************/
51 /*** create OccurrencesReader callback ***/
53 TapirOccurrences::CreateOccurrencesReaderCallback( const char *source, const char *coordSystem )
54 {
55  return new TapirOccurrences( source, coordSystem );
56 }
57 
58 /***************************/
59 /*** _curl header writer ***/
60 size_t
61 TapirOccurrences::_curl_header_writer( void *ptr, size_t size, size_t nmemb, void *stream )
62 {
63  if ( stream == NULL ) {
64 
65  return 0;
66  }
67 
68  // *stream is actually a string object
69  std::string& str = *( reinterpret_cast<std::string*>( stream ) );
70 
71  str.append( reinterpret_cast<const char*>(ptr), size*nmemb );
72 
73  return size*nmemb;
74 }
75 
76 /*************************/
77 /*** _curl body writer ***/
78 size_t
79 TapirOccurrences::_curl_body_writer( void *ptr, size_t size, size_t nmemb, void *stream )
80 {
81  if ( stream == NULL ) {
82 
83  return 0;
84  }
85 
86  // *stream is actually a string object
87  std::string& str = *( reinterpret_cast<std::string*>( stream ) );
88 
89  str.append( reinterpret_cast<const char*>(ptr), size*nmemb );
90 
91  return size*nmemb;
92 }
93 
94 /*******************/
95 /*** Constructor ***/
96 TapirOccurrences::TapirOccurrences( const char *source, const char *coordSystem )
97 {
98  if ( curl_global_init( CURL_GLOBAL_ALL ) != CURLE_OK ) {
99 
100  throw OccurrencesReaderException( "Could not initialize libcurl" );
101  }
102 
103  _loaded = false;
104 
105  _source = (char *) source; // TAPIR endpoint
106 
107  _coord_system = (char *) coordSystem;
108 
109  _default_limit = 100;
110 }
111 
112 
113 /******************/
114 /*** Destructor ***/
116 {
117  curl_global_cleanup();
118 }
119 
120 
121 /************/
122 /*** load ***/
123 bool
125 {
126  if ( _loaded ) {
127 
128  return true;
129  }
130 
131  Log::instance()->info( "Checking endpoint using TAPIR driver\n" );
132 
133  Log::instance()->info( "CURL version is %s\n", curl_version() );
134 
135  // Prepare CURL handle
136  CURL * curl_handle = curl_easy_init();
137 
138  if ( curl_handle == NULL ) {
139 
140  Log::instance()->error( "TapirOccurrences::load - Could not initialize curl handle\n" );
141  return false;
142  }
143 
144  // Prepare TAPIR capabilities request
145  std::string capabilities_url( _source );
146 
147  if ( capabilities_url.find( "?" ) != string::npos ) {
148 
149  capabilities_url.append( "&" );
150  }
151  else {
152 
153  capabilities_url.append( "?" );
154  }
155 
156  capabilities_url.append( "op=c" );
157 
158  // Set CURL options
159  if ( curl_easy_setopt( curl_handle, CURLOPT_URL, capabilities_url.c_str() ) != CURLE_OK ) {
160 
161  Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_URL\n" );
162  curl_easy_cleanup( curl_handle );
163  return false;
164  }
165 
166  if ( curl_easy_setopt( curl_handle, CURLOPT_HEADERFUNCTION, &TapirOccurrences::_curl_header_writer ) != CURLE_OK ) {
167 
168  Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_HEADERFUNCTION\n" );
169  curl_easy_cleanup( curl_handle );
170  return false;
171  }
172 
173  std::string header;
174 
175  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEHEADER, &header ) != CURLE_OK ) {
176 
177  Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_WRITEHEADER\n" );
178  curl_easy_cleanup( curl_handle );
179  return false;
180  }
181 
182  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEFUNCTION, &TapirOccurrences::_curl_body_writer ) != CURLE_OK ) {
183 
184  Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_WRITEFUNCTION\n" );
185  curl_easy_cleanup( curl_handle );
186  return false;
187  }
188 
189  std::string capabilities_response;
190 
191  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEDATA, &capabilities_response ) != CURLE_OK ) {
192 
193  Log::instance()->error( "TapirOccurrences::load - Failed to set CURLOPT_WRITEDATA\n" );
194  curl_easy_cleanup( curl_handle );
195  return false;
196  }
197 
198  Log::instance()->info( "Getting capabilities using %s\n", capabilities_url.c_str() );
199 
200  // Send capabilities request
201  CURLcode curl_code = curl_easy_perform( curl_handle );
202 
203  if ( curl_code != CURLE_OK )
204  {
205  Log::instance()->error( "TapirOccurrences::load - Could not get service capabilities from the specified endpoint (CURL code error: %d)\n", curl_code );
206  curl_easy_cleanup( curl_handle );
207  return false;
208  }
209 
210  curl_easy_cleanup( curl_handle );
211 
212  // Check content type returned
213  if ( header.find( "Content-Type: text/xml" ) == string::npos )
214  {
215  Log::instance()->warn( "TapirOccurrences::load - URL does not seem to be a TAPIR endpoint (HTTP Content-Type header is not text/xml)\n" );
216 
217  return false;
218  }
219 
220  // Data structure to store relevant capabilities information
221  CapabilitiesInfo info;
222 
223  // Parse capabilities response
224  if ( ! _parseCapabilities( &capabilities_response, &info ) ) {
225 
226  return false;
227  }
228 
229  // Check capabilitites response
230  if ( ! info._is_tapir ) {
231 
232  Log::instance()->warn( "TapirOccurrences::load - URL does not seem to be a TAPIR endpoint (no TAPIR response element detected)\n" );
233  return false;
234  }
235  if ( ! info._has_guid ) {
236 
237  Log::instance()->error( "TapirOccurrences::load - Provider did not map the DarwinCore GlobalUniqueIdentifier concept\n" );
238  return false;
239  }
240  if ( ! info._has_name ) {
241 
242  Log::instance()->error( "TapirOccurrences::load - Provider did not map the DarwinCore ScientificName concept\n" );
243  return false;
244  }
245  if ( ! info._has_long ) {
246 
247  Log::instance()->error( "TapirOccurrences::load - Provider did not map the DarwinCore DecimalLongitude concept from the geospatial extension\n" );
248  return false;
249  }
250  if ( ! info._has_lat ) {
251 
252  Log::instance()->error( "TapirOccurrences::load - Provider did not map the DarwinCore DecimalLatitude concept from the geospatial extension\n" );
253  return false;
254  }
255  if ( ( ! info._accepts_om_template ) && ( ! info._accepts_any_model ) && ! info._accepts_om_model ) {
256 
257  Log::instance()->error( "TapirOccurrences::load - Provider must accept searches with the openModeller query template or searches with the openModeller output model or searches with any output model\n" );
258  return false;
259  }
260 
261  Log::instance()->info( "Finished reading capabilities\n" );
262 
263  _loaded = true;
264 
265  return true;
266 }
267 
268 
269 /**************************/
270 /*** parse capabilities ***/
271 bool
273 {
274  XML_Parser parser = XML_ParserCreateNS( NULL, '/' );
275 
276  if ( ! parser ) {
277 
278  Log::instance()->error( "Unable to allocate memory for capabilities response parser" );
279  return false;
280  }
281 
282  info->_parser = parser;
283 
284  XML_SetElementHandler( parser, &TapirOccurrences::_startCapabilitiesElement, NULL );
285 
286  XML_SetUserData( parser, info );
287 
288  if ( ! XML_Parse( parser, xml->c_str(), xml->size(), 1 ) ) {
289 
290  XML_Error error_code = XML_GetErrorCode( parser );
291  std::ostringstream errormsg;
292  errormsg << XML_ErrorString( error_code )
293  << " at Line "
294  << XML_GetCurrentLineNumber( parser )
295  << " column "
296  << XML_GetCurrentColumnNumber( parser )
297  << std::ends;
298 
299  Log::instance()->error( "XML Parser fatal error for capabilities response: %s\n", errormsg.str().c_str() );
300  XML_ParserFree( parser );
301  return false;
302  }
303 
304  XML_ParserFree( parser );
305 
306  _provider_limit = atoi( info->_max_records.c_str() );
307 
308  return true;
309 }
310 
311 
312 /**********************************/
313 /*** start capabilities element ***/
314 void
315 TapirOccurrences::_startCapabilitiesElement( void *data, const char *el, const char **attr )
316 {
317  CapabilitiesInfo& info = *( reinterpret_cast<CapabilitiesInfo*>( data ) );
318 
319  // response element
320  if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/response", 37 ) == 0 ) {
321 
322  info._is_tapir = true;
323  }
324  // mappedConcept element
325  else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/mappedConcept", 42 ) == 0 ) {
326 
327  for ( int i = 0; attr[i]; i += 2 ) {
328 
329  // id attribute (TODO: check "searchable" attribute too)
330  if ( strncmp( attr[i], "id", 2 ) == 0 ) {
331 
332  if ( strncmp( attr[i+1], "http://rs.tdwg.org/dwc/dwcore/GlobalUniqueIdentifier", 52 ) == 0 ) {
333  info._has_guid = true;
334  }
335  else if ( strncmp( attr[i+1], "http://rs.tdwg.org/dwc/dwcore/ScientificName", 44 ) == 0 ) {
336 
337  info._has_name = true;
338  }
339  else if ( strncmp( attr[i+1], "http://rs.tdwg.org/dwc/geospatial/DecimalLongitude", 50 ) == 0 ) {
340 
341  info._has_long = true;
342  }
343  else if ( strncmp( attr[i+1], "http://rs.tdwg.org/dwc/geospatial/DecimalLatitude/", 49 ) == 0 ) {
344 
345  info._has_lat = true;
346  }
347  }
348  }
349  }
350  // template element
351  else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/template", 37 ) == 0 ) {
352 
353  for ( int i = 0; attr[i]; i += 2 ) {
354 
355  // location attribute
356  if ( strncmp( attr[i], "location", 8 ) == 0 &&
357  strncmp( attr[i+1], TP_TEMPLATE_LOCATION, TP_TEMPLATE_STRSIZE ) == 0 ) {
358 
359  info._accepts_om_template = true;
360  }
361  }
362  }
363  // outputModel element
364  else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/outputModel", 40 ) == 0 ) {
365 
366  for ( int i = 0; attr[i]; i += 2 ) {
367 
368  // location attribute
369  if ( strncmp( attr[i], "location", 8 ) == 0 &&
370  strncmp( attr[i+1], TP_OUTPUT_MODEL_LOCATION, TP_OUTPUT_MODEL_STRSIZE ) == 0 ) {
371 
372  info._accepts_om_model = true;
373  }
374  }
375  }
376  // anyOutputModels element
377  else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/anyOutputModels", 44 ) == 0 ) {
378 
379  info._accepts_any_model = true;
380  }
381  // maxElementRepetitions element
382  else if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/maxElementRepetitions", 50 ) == 0 ) {
383  XML_SetCharacterDataHandler( info._parser, &TapirOccurrences::_maxRecordsDataHandler );
384 
385  XML_SetElementHandler( info._parser, &TapirOccurrences::_startCapabilitiesElement,
387  }
388 }
389 
390 
391 /********************************/
392 /*** max records data handler ***/
393 void
394 TapirOccurrences::_maxRecordsDataHandler( void *data, const char *value, int len )
395 {
396  CapabilitiesInfo& info = *( reinterpret_cast<CapabilitiesInfo*>( data ) );
397 
398  info._max_records.append( value, len );
399 }
400 
401 
402 /********************************/
403 /*** end capabilities element ***/
404 void
405 TapirOccurrences::_endCapabilitiesElement( void *data, const char *el )
406 {
407  // maxElementRepetitions element
408  if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/maxElementRepetitions", 50 ) == 0 ) {
409 
410  CapabilitiesInfo& info = *( reinterpret_cast<CapabilitiesInfo*>( data ) );
411 
412  XML_SetCharacterDataHandler( info._parser, NULL );
413  XML_SetElementHandler( info._parser, &TapirOccurrences::_startCapabilitiesElement, NULL );
414  }
415 }
416 
417 
418 /*********************/
419 /*** get Presences ***/
421 TapirOccurrences::getPresences( const char *groupId )
422 {
423  // If group was not specified, return empty set
424  if ( ! groupId ) {
425 
426  return new OccurrencesImpl( 1 );
427  }
428 
429  LstOccurrences::iterator ocs = _presences.begin();
430  LstOccurrences::iterator end = _presences.end();
431 
432  while ( ocs != end ) {
433 
434  OccurrencesPtr oc = *ocs;
435 
436  if ( ! strcasecmp( groupId, oc->label() ) ) {
437 
438  _presences.erase( ocs );
439 
440  return oc;
441  }
442 
443  ++ocs;
444  }
445 
446  // If not found, create new group and retrieve records from provider
447 
448  OccurrencesPtr occurrences( new OccurrencesImpl( groupId, _coord_system ) );
449 
450  TapirRecordData search_data;
451 
452  search_data._occurrences = occurrences;
453  search_data._next = 0;
454 
455  int limit = _default_limit;
456 
458 
459  limit = _provider_limit;
460  }
461 
462  while ( search_data._next >= 0 ) {
463 
464  Log::instance()->info( "Fetching records (start %d, limit %d)\n", search_data._next, limit );
465 
466  if ( ! _retrieveRecords( &search_data, limit ) ) {
467 
468  break;
469  }
470  }
471 
472  _presences.push_back( occurrences );
473 
474  return occurrences;
475 }
476 
477 
478 /************************/
479 /*** retrieve Records ***/
480 bool
482 {
483  // Prepare CURL handle
484  CURL * curl_handle = curl_easy_init();
485 
486  if ( curl_handle == NULL ) {
487 
488  Log::instance()->error( "TapirOccurrences::_retrieveRecords - Could not initialize curl handle\n" );
489  return false;
490  }
491 
492  // Prepare TAPIR search request
493  std::string source( _source );
494 
495  std::ostringstream search_url;
496 
497  search_url << source.c_str();
498 
499  if ( source.find( "?" ) != string::npos ) {
500 
501  search_url << "&";
502  }
503  else {
504 
505  search_url << "?";
506  }
507 
508  search_url << "op=s&s=" << data->_next;
509 
510  search_url << "&l=" << limit;
511 
512 // curl_easy_escape was included in libcurl version 7.15.4
513 #if LIBCURL_VERSION_NUM >= 0x070f04
514  search_url << "&sciname=" << curl_easy_escape( curl_handle, data->_occurrences->label(), 0 );
515  search_url << "&t=" << curl_easy_escape( curl_handle, TP_TEMPLATE_LOCATION, 0 );
516 #else
517  search_url << "&sciname=" << curl_escape( data->_occurrences->label(), 0 );
518  search_url << "&t=" << curl_escape( TP_TEMPLATE_LOCATION, 0 );
519 #endif
520 
521  // After using next to make the URL, set it to -1 to stop the process in case
522  // the response does not return the "next" attribute
523  data->_next = -1;
524 
525  // Set CURL options
526  if ( curl_easy_setopt( curl_handle, CURLOPT_URL, search_url.str().c_str() ) != CURLE_OK ) {
527 
528  Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_URL\n" );
529  curl_easy_cleanup( curl_handle );
530  return false;
531  }
532 
533  if ( curl_easy_setopt( curl_handle, CURLOPT_HEADERFUNCTION, &TapirOccurrences::_curl_header_writer ) != CURLE_OK ) {
534 
535  Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_HEADERFUNCTION\n" );
536  curl_easy_cleanup( curl_handle );
537  return false;
538  }
539 
540  std::string header;
541 
542  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEHEADER, &header ) != CURLE_OK ) {
543 
544  Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEHEADER\n" );
545  curl_easy_cleanup( curl_handle );
546  return false;
547  }
548 
549  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEFUNCTION, &TapirOccurrences::_curl_body_writer ) != CURLE_OK ) {
550 
551  Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEFUNCTION\n" );
552  curl_easy_cleanup( curl_handle );
553  return false;
554  }
555 
556  std::string search_response;
557 
558  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEDATA, &search_response ) != CURLE_OK ) {
559 
560  Log::instance()->error( "TapirOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEDATA\n" );
561  curl_easy_cleanup( curl_handle );
562  return false;
563  }
564 
565  Log::instance()->info( "Searching records using %s\n", search_url.str().c_str() );
566 
567  // Send search request
568  CURLcode curl_code = curl_easy_perform( curl_handle );
569 
570  if ( curl_code != CURLE_OK )
571  {
572  const char * error_msg = curl_easy_strerror( curl_code );
573 
574  Log::instance()->error( "TapirOccurrences::_retrieveRecords - Could not search records from the specified TAPIR endpoint (CURL error: %s)\n", error_msg );
575 
576  curl_easy_cleanup( curl_handle );
577  return false;
578  }
579 
580  curl_easy_cleanup( curl_handle );
581 
582  // Check content type returned
583  if ( header.find( "Content-Type: text/xml" ) == string::npos )
584  {
585  Log::instance()->error( "TapirOccurrences::_retrieveRecords - URL does not seem to be a TAPIR endpoint (HTTP Content-Type header is not text/xml)\n" );
586  return false;
587  }
588 
589  // Parse search response
590  XML_Parser parser = XML_ParserCreateNS( NULL, '/' );
591 
592  if ( ! parser ) {
593 
594  Log::instance()->error( "Unable to allocate memory for search response parser" );
595  return false;
596  }
597 
598  XML_SetElementHandler( parser, &TapirOccurrences::_startSearchElement, NULL );
599 
600  XML_SetUserData( parser, data );
601 
602  if ( ! XML_Parse( parser, search_response.c_str(), search_response.size(), 1 ) ) {
603 
604  XML_Error error_code = XML_GetErrorCode( parser );
605  std::ostringstream errormsg;
606  errormsg << XML_ErrorString( error_code )
607  << " at Line "
608  << XML_GetCurrentLineNumber( parser )
609  << " column "
610  << XML_GetCurrentColumnNumber( parser )
611  << std::ends;
612 
613  Log::instance()->error( "XML Parser fatal error for search response: %s\n", errormsg.str().c_str() );
614  XML_ParserFree( parser );
615  return false;
616  }
617 
618  XML_ParserFree( parser );
619 
620  return true;
621 }
622 
623 
624 /****************************/
625 /*** start search element ***/
626 void
627 TapirOccurrences::_startSearchElement( void *data, const char *el, const char **attr )
628 {
629  TapirRecordData& search_data = *( reinterpret_cast<TapirRecordData*>( data ) );
630 
631  // occ element
632  if ( strlen( el ) == 49 &&
633  strncmp( el, "http://openmodeller.cria.org.br/xml/tapir/1.0/occ", 49 ) == 0 ) {
634 
635  std::string guid("");
636 
637  Coord lg = 0;
638  Coord lt = 0;
639 
640  for ( int i = 0; attr[i]; i += 2 ) {
641 
642  // guid attribute
643  if ( strncmp( attr[i], "guid", 4 ) == 0 ) {
644 
645  guid = attr[i+1];
646  }
647  // long attribute
648  else if ( strncmp( attr[i], "long", 4 ) == 0 ) {
649 
650  lg = Coord( atof( attr[i+1] ) );
651  }
652  // lat attribute
653  else if ( strncmp( attr[i], "lat", 3 ) == 0 ) {
654 
655  lt = Coord( atof( attr[i+1] ) );
656  }
657  // TODO: get datum and convert coordinates when necessary
658  }
659 
660  search_data._occurrences->createOccurrence( guid.c_str(), lg, lt, 0.0, 1, 0, 0 );
661  }
662  // summary element
663  if ( strncmp( el, "http://rs.tdwg.org/tapir/1.0/summary", 36 ) == 0 ) {
664 
665  for ( int i = 0; attr[i]; i += 2 ) {
666 
667  // next attribute
668  if ( strncmp( attr[i], "next", 4 ) == 0 ) {
669 
670  search_data._next = atoi( attr[i+1] );
671  }
672  }
673  }
674 }
LstOccurrences _presences
void warn(const char *format,...)
'Warn' level.
Definition: Log.cpp:273
static OccurrencesReader * CreateOccurrencesReaderCallback(const char *source, const char *coordSystem)
static size_t _curl_header_writer(void *ptr, size_t size, size_t nmemb, void *stream)
#define TP_TEMPLATE_STRSIZE
static void _endCapabilitiesElement(void *data, const char *el)
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
OccurrencesPtr _occurrences
void error(const char *format,...)
'Error' level.
Definition: Log.cpp:290
std::string _max_records
bool _parseCapabilities(const std::string *xml, CapabilitiesInfo *info)
static size_t _curl_body_writer(void *ptr, size_t size, size_t nmemb, void *stream)
#define TP_OUTPUT_MODEL_STRSIZE
#define TP_OUTPUT_MODEL_LOCATION
bool _retrieveRecords(TapirRecordData *data, int limit)
static void _startCapabilitiesElement(void *data, const char *el, const char **attr)
static void _maxRecordsDataHandler(void *data, const char *value, int len)
static void _startSearchElement(void *data, const char *el, const char **attr)
void info(const char *format,...)
'Info' level.
Definition: Log.cpp:256
TapirOccurrences(const char *url, const char *coordSystem)
OccurrencesPtr getPresences(const char *groupId)
double Coord
Type of map coordinates.
Definition: om_defs.hh:38
#define TP_TEMPLATE_LOCATION