openModeller  Version 1.5.0
GbifOccurrences.cpp
Go to the documentation of this file.
1 
28 
30 
32 
33 #include <stdlib.h>
34 
35 //c style include needed for strcmp etc
36 #include <string.h>
37 using std::string;
38 
39 #include <sstream>
40 using std::ostringstream;
41 
42 #include <curl/curl.h>
43 
44 #include <expat.h>
45 
46 /*****************************************/
47 /*** create OccurrencesReader callback ***/
49 GbifOccurrences::CreateOccurrencesReaderCallback( const char *source, const char *coordSystem )
50 {
51  return new GbifOccurrences( source, coordSystem );
52 }
53 
54 /***************************/
55 /*** _curl header writer ***/
56 size_t
57 GbifOccurrences::_curl_header_writer( void *ptr, size_t size, size_t nmemb, void *stream )
58 {
59  if ( stream == NULL ) {
60 
61  return 0;
62  }
63 
64  // *stream is actually a string object
65  std::string& str = *( reinterpret_cast<std::string*>( stream ) );
66 
67  str.append( reinterpret_cast<const char*>(ptr), size*nmemb );
68 
69  return size*nmemb;
70 }
71 
72 /*************************/
73 /*** _curl body writer ***/
74 size_t
75 GbifOccurrences::_curl_body_writer( void *ptr, size_t size, size_t nmemb, void *stream )
76 {
77  if ( stream == NULL ) {
78 
79  return 0;
80  }
81 
82  // *stream is actually a string object
83  std::string& str = *( reinterpret_cast<std::string*>( stream ) );
84 
85  str.append( reinterpret_cast<const char*>(ptr), size*nmemb );
86 
87  return size*nmemb;
88 }
89 
90 /*******************/
91 /*** Constructor ***/
92 GbifOccurrences::GbifOccurrences( const char *source, const char *coordSystem )
93 {
94  if ( curl_global_init( CURL_GLOBAL_ALL ) != CURLE_OK ) {
95 
96  throw OccurrencesReaderException( "Could not initialize libcurl" );
97  }
98 
99  _loaded = false;
100 
101  _source = (char *) source; // endpoint
102 
103  _coord_system = (char *) coordSystem;
104 
105  _default_limit = 100;
106 }
107 
108 
109 /******************/
110 /*** Destructor ***/
112 {
113  curl_global_cleanup();
114 }
115 
116 
117 /************/
118 /*** load ***/
119 bool
121 {
122  if ( _loaded ) {
123 
124  return true;
125  }
126 
127  Log::instance()->info( "Checking endpoint using GBIF driver\n" );
128 
129  Log::instance()->info( "CURL version is %s\n", curl_version() );
130 
131  // Prepare CURL handle
132  CURL * curl_handle = curl_easy_init();
133 
134  if ( curl_handle == NULL ) {
135 
136  Log::instance()->error( "GbifOccurrences::load - Could not initialize curl handle\n" );
137  return false;
138  }
139 
140  // Prepare request with no parameters just to check if it's the GBIF service
141 
142 
143  // Set CURL options
144  if ( curl_easy_setopt( curl_handle, CURLOPT_URL, _source ) != CURLE_OK ) {
145 
146  Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_URL\n" );
147  curl_easy_cleanup( curl_handle );
148  return false;
149  }
150 
151  if ( curl_easy_setopt( curl_handle, CURLOPT_HEADERFUNCTION, &GbifOccurrences::_curl_header_writer ) != CURLE_OK ) {
152 
153  Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_HEADERFUNCTION\n" );
154  curl_easy_cleanup( curl_handle );
155  return false;
156  }
157 
158  std::string header;
159 
160  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEHEADER, &header ) != CURLE_OK ) {
161 
162  Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_WRITEHEADER\n" );
163  curl_easy_cleanup( curl_handle );
164  return false;
165  }
166 
167  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEFUNCTION, &GbifOccurrences::_curl_body_writer ) != CURLE_OK ) {
168 
169  Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_WRITEFUNCTION\n" );
170  curl_easy_cleanup( curl_handle );
171  return false;
172  }
173 
174  std::string response;
175 
176  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEDATA, &response ) != CURLE_OK ) {
177 
178  Log::instance()->error( "GbifOccurrences::load - Failed to set CURLOPT_WRITEDATA\n" );
179  curl_easy_cleanup( curl_handle );
180  return false;
181  }
182 
183  Log::instance()->info( "Getting service info using %s\n", _source );
184 
185  // Send request
186  CURLcode curl_code = curl_easy_perform( curl_handle );
187 
188  if ( curl_code != CURLE_OK )
189  {
190  Log::instance()->error( "GbifOccurrences::load - Could not get service info from the specified endpoint (CURL code error: %d)\n", curl_code );
191  curl_easy_cleanup( curl_handle );
192  return false;
193  }
194 
195  curl_easy_cleanup( curl_handle );
196 
197  // Check content type returned
198  if ( header.find( "Content-Type: text/xml" ) == string::npos )
199  {
200  Log::instance()->error( "GbifOccurrences::load - URL does not seem to be a GBIF REST WS endpoint (HTTP Content-Type header is not text/xml)\n" );
201 
202  Log::instance()->info( "HEADER: %s\n", header.c_str() );
203 
204  return false;
205  }
206 
207  // Data structure to store relevant service information
208  ServiceInfo info;
209 
210  // Parse response
211  if ( ! _parseServiceInfo( &response, &info ) ) {
212 
213  return false;
214  }
215 
216  // Check response
217  if ( ! info._is_gbif ) {
218 
219  Log::instance()->error( "GbifOccurrences::load - URL does not seem to be a GBIF WS REST endpoint (root element is not gbifResponse)\n" );
220  return false;
221  }
222 
223  Log::instance()->info( "Finished checking service info\n" );
224 
225  _loaded = true;
226 
227  return true;
228 }
229 
230 
231 /**************************/
232 /*** parse service info ***/
233 bool
234 GbifOccurrences::_parseServiceInfo( const std::string *xml, ServiceInfo *info )
235 {
236  XML_Parser parser = XML_ParserCreateNS( NULL, '/' );
237 
238  if ( ! parser ) {
239 
240  Log::instance()->error( "Unable to allocate memory for capabilities response parser" );
241  return false;
242  }
243 
244  info->_parser = parser;
245 
246  XML_SetElementHandler( parser, &GbifOccurrences::_startServiceInfoElement, NULL );
247 
248  XML_SetUserData( parser, info );
249 
250  if ( ! XML_Parse( parser, xml->c_str(), xml->size(), 1 ) ) {
251 
252  XML_Error error_code = XML_GetErrorCode( parser );
253  std::ostringstream errormsg;
254  errormsg << XML_ErrorString( error_code )
255  << " at Line "
256  << XML_GetCurrentLineNumber( parser )
257  << " column "
258  << XML_GetCurrentColumnNumber( parser )
259  << std::ends;
260 
261  Log::instance()->error( "XML Parser fatal error for capabilities response: %s\n", errormsg.str().c_str() );
262  XML_ParserFree( parser );
263  return false;
264  }
265 
266  XML_ParserFree( parser );
267 
268  _provider_limit = atoi( info->_max_records.c_str() );
269 
270  return true;
271 }
272 
273 
274 /**********************************/
275 /*** start service info element ***/
276 void
277 GbifOccurrences::_startServiceInfoElement( void *data, const char *el, const char **attr )
278 {
279  ServiceInfo& info = *( reinterpret_cast<ServiceInfo*>( data ) );
280 
281  // gbifResponse element
282  if ( strncmp( el, "http://portal.gbif.org/ws/response/gbif/gbifResponse", 52 ) == 0 ) {
283 
284  info._is_gbif = true;
285  }
286  // parameter element
287  else if ( strncmp( el, "http://portal.gbif.org/ws/response/gbif/parameter", 49 ) == 0 ) {
288 
289  bool is_maxresults = false;
290 
291  std::string value("");
292 
293  for ( int i = 0; attr[i]; i += 2 ) {
294 
295  // name attribute
296  if ( strncmp( attr[i], "name", 4 ) == 0 ) {
297 
298  if ( strncmp( attr[i+1], "maxresults", 10 ) == 0 ) {
299  is_maxresults = true;
300  }
301  }
302  else if ( strncmp( attr[i], "value", 5 ) == 0 ) {
303 
304  value = attr[i+1];
305  }
306 
307  if ( is_maxresults ) {
308 
309  info._max_records = value;
310 
311  // No need to keep parsing
312  XML_SetElementHandler( info._parser, NULL, NULL );
313  }
314  }
315  }
316 }
317 
318 
319 /*********************/
320 /*** get Presences ***/
322 GbifOccurrences::getPresences( const char *groupId )
323 {
324  // If group was not specified, return empty set
325  if ( ! groupId ) {
326 
327  return new OccurrencesImpl( 1 );
328  }
329 
330  LstOccurrences::iterator ocs = _presences.begin();
331  LstOccurrences::iterator end = _presences.end();
332 
333  while ( ocs != end ) {
334 
335  OccurrencesPtr oc = *ocs;
336 
337  if ( ! strcasecmp( groupId, oc->label() ) ) {
338 
339  _presences.erase( ocs );
340 
341  return oc;
342  }
343 
344  ++ocs;
345  }
346 
347  // If not found, create new group and retrieve records from provider
348 
349  OccurrencesPtr occurrences( new OccurrencesImpl( groupId, _coord_system ) );
350 
351  GbifRecordData search_data;
352 
353  search_data._occurrences = occurrences;
354  search_data._next = 0;
355 
356  int limit = _default_limit;
357 
359 
360  limit = _provider_limit;
361  }
362 
363  while ( search_data._next >= 0 ) {
364 
365  Log::instance()->info( "Fetching records (start %d, limit %d)\n", search_data._next, limit );
366 
367  if ( ! _retrieveRecords( &search_data, limit ) ) {
368 
369  break;
370  }
371  }
372 
373  _presences.push_back( occurrences );
374 
375  return occurrences;
376 }
377 
378 
379 /************************/
380 /*** retrieve Records ***/
381 bool
383 {
384  // Prepare CURL handle
385  CURL * curl_handle = curl_easy_init();
386 
387  if ( curl_handle == NULL ) {
388 
389  Log::instance()->error( "GbifOccurrences::_retrieveRecords - Could not initialize curl handle\n" );
390  return false;
391  }
392 
393  // Prepare search request
394  std::string source( _source );
395 
396  std::ostringstream search_url;
397 
398  search_url << source.c_str();
399 
400  if ( source.find( "?" ) != string::npos ) {
401 
402  search_url << "&";
403  }
404  else {
405 
406  search_url << "?";
407  }
408 
409  search_url << "startindex=" << data->_next;
410 
411  search_url << "&maxresults=" << limit;
412 
413 // curl_easy_escape was included in libcurl version 7.15.4
414 #if LIBCURL_VERSION_NUM >= 0x070f04
415  search_url << "&scientificname=" << curl_easy_escape( curl_handle, data->_occurrences->label(), 0 );
416 #else
417  search_url << "&scientificname=" << curl_escape( data->_occurrences->label(), 0 );
418 #endif
419 
420  search_url << "&format=brief&coordinatestatus=true&coordinateissues=false";
421 
422  // After using next to make the URL, set it to -1 to stop the process in case
423  // the response does not return the "next" attribute
424  data->_next = -1;
425 
426  // Set CURL options
427  if ( curl_easy_setopt( curl_handle, CURLOPT_URL, search_url.str().c_str() ) != CURLE_OK ) {
428 
429  Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_URL\n" );
430  curl_easy_cleanup( curl_handle );
431  return false;
432  }
433 
434  if ( curl_easy_setopt( curl_handle, CURLOPT_HEADERFUNCTION, &GbifOccurrences::_curl_header_writer ) != CURLE_OK ) {
435 
436  Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_HEADERFUNCTION\n" );
437  curl_easy_cleanup( curl_handle );
438  return false;
439  }
440 
441  std::string header;
442 
443  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEHEADER, &header ) != CURLE_OK ) {
444 
445  Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEHEADER\n" );
446  curl_easy_cleanup( curl_handle );
447  return false;
448  }
449 
450  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEFUNCTION, &GbifOccurrences::_curl_body_writer ) != CURLE_OK ) {
451 
452  Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEFUNCTION\n" );
453  curl_easy_cleanup( curl_handle );
454  return false;
455  }
456 
457  std::string search_response;
458 
459  if ( curl_easy_setopt( curl_handle, CURLOPT_WRITEDATA, &search_response ) != CURLE_OK ) {
460 
461  Log::instance()->error( "GbifOccurrences::_retrieveRecords - Failed to set CURLOPT_WRITEDATA\n" );
462  curl_easy_cleanup( curl_handle );
463  return false;
464  }
465 
466  Log::instance()->info( "Searching records using %s\n", search_url.str().c_str() );
467 
468  // Send search request
469  CURLcode curl_code = curl_easy_perform( curl_handle );
470 
471  if ( curl_code != CURLE_OK )
472  {
473  const char * error_msg = curl_easy_strerror( curl_code );
474 
475  Log::instance()->error( "GbifOccurrences::_retrieveRecords - Could not search records from the specified GBIF endpoint (CURL error: %s)\n", error_msg );
476 
477  curl_easy_cleanup( curl_handle );
478  return false;
479  }
480 
481  curl_easy_cleanup( curl_handle );
482 
483  // Check content type returned
484  if ( header.find( "Content-Type: text/xml" ) == string::npos )
485  {
486  Log::instance()->error( "GbifOccurrences::_retrieveRecords - URL does not seem to be a GBIF WS REST endpoint (HTTP Content-Type header is not text/xml)\n" );
487  return false;
488  }
489 
490  // Parse search response
491  XML_Parser parser = XML_ParserCreateNS( NULL, '/' );
492 
493  if ( ! parser ) {
494 
495  Log::instance()->error( "Unable to allocate memory for search response parser" );
496  return false;
497  }
498 
499  data->_parser = parser;
500 
502 
503  XML_SetUserData( parser, data );
504 
505  if ( ! XML_Parse( parser, search_response.c_str(), search_response.size(), 1 ) ) {
506 
507  XML_Error error_code = XML_GetErrorCode( parser );
508  std::ostringstream errormsg;
509  errormsg << XML_ErrorString( error_code )
510  << " at Line "
511  << XML_GetCurrentLineNumber( parser )
512  << " column "
513  << XML_GetCurrentColumnNumber( parser )
514  << std::ends;
515 
516  Log::instance()->error( "XML Parser fatal error for search response: %s\n", errormsg.str().c_str() );
517  XML_ParserFree( parser );
518  return false;
519  }
520 
521  XML_ParserFree( parser );
522 
523  return true;
524 }
525 
526 
527 /****************************/
528 /*** start search element ***/
529 void
530 GbifOccurrences::_startSearchElement( void *data, const char *el, const char **attr )
531 {
532  GbifRecordData& search_data = *( reinterpret_cast<GbifRecordData*>( data ) );
533 
534  // summary element
535  if ( strncmp( el, "http://portal.gbif.org/ws/response/gbif/summary", 47 ) == 0 ) {
536 
537  for ( int i = 0; attr[i]; i += 2 ) {
538 
539  // next attribute
540  if ( strncmp( attr[i], "next", 4 ) == 0 ) {
541 
542  search_data._next = atoi( attr[i+1] );
543  }
544  }
545  }
546  // TaxonOccurrence element
547  else if ( strncmp( el, "http://rs.tdwg.org/ontology/voc/TaxonOccurrence#/TaxonOccurrence", 64 ) == 0 ) {
548 
549  for ( int i = 0; attr[i]; i += 2 ) {
550 
551  // rdf:about attribute
552  if ( strncmp( attr[i], "http://www.w3.org/1999/02/22-rdf-syntax-ns#/about", 49 ) == 0 ) {
553 
554  search_data._last_guid = attr[i+1];
555  }
556  }
557  }
558  // decimalLatitude element
559  else if ( strncmp( el, "http://rs.tdwg.org/ontology/voc/TaxonOccurrence#/decimalLatitude", 64 ) == 0 ) {
560 
561  XML_SetCharacterDataHandler( search_data._parser, &GbifOccurrences::_ltDataHandler );
562  }
563  // decimalLongitude element
564  else if ( strncmp( el, "http://rs.tdwg.org/ontology/voc/TaxonOccurrence#/decimalLongitude", 65 ) == 0 ) {
565 
566  XML_SetCharacterDataHandler( search_data._parser, &GbifOccurrences::_lgDataHandler );
567  }
568 }
569 
570 
571 /**************************/
572 /*** end search element ***/
573 void
574 GbifOccurrences::_endSearchElement( void *data, const char *el )
575 {
576  // TaxonOccurrence element
577  if ( strncmp( el, "http://rs.tdwg.org/ontology/voc/TaxonOccurrence#/TaxonOccurrence", 64 ) == 0 ) {
578 
579  GbifRecordData& search_data = *( reinterpret_cast<GbifRecordData*>( data ) );
580 
581  search_data._occurrences->createOccurrence( search_data._last_guid.c_str(), search_data._last_lg, search_data._last_lt, 0.0, 1, 0, 0 );
582  }
583 }
584 
585 
586 /***********************/
587 /*** lt data handler ***/
588 void
589 GbifOccurrences::_ltDataHandler( void *data, const char *value, int len )
590 {
591  GbifRecordData& search_data = *( reinterpret_cast<GbifRecordData*>( data ) );
592 
593  std::string lt("");
594  lt.append( value, len );
595 
596  search_data._last_lt = Coord( atof( lt.c_str() ) );
597 
598  XML_SetCharacterDataHandler( search_data._parser, NULL );
599 }
600 
601 
602 /***********************/
603 /*** lg data handler ***/
604 void
605 GbifOccurrences::_lgDataHandler( void *data, const char *value, int len )
606 {
607  GbifRecordData& search_data = *( reinterpret_cast<GbifRecordData*>( data ) );
608 
609  std::string lg("");
610  lg.append( value, len );
611 
612  search_data._last_lg = Coord( atof( lg.c_str() ) );
613 
614  XML_SetCharacterDataHandler( search_data._parser, NULL );
615 }
static void _endSearchElement(void *data, const char *el)
static OccurrencesReader * CreateOccurrencesReaderCallback(const char *source, const char *coordSystem)
LstOccurrences _presences
static size_t _curl_header_writer(void *ptr, size_t size, size_t nmemb, void *stream)
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
static size_t _curl_body_writer(void *ptr, size_t size, size_t nmemb, void *stream)
std::string _max_records
static void _startServiceInfoElement(void *data, const char *el, const char **attr)
std::string _last_guid
void error(const char *format,...)
'Error' level.
Definition: Log.cpp:290
static void _lgDataHandler(void *data, const char *value, int len)
static void _ltDataHandler(void *data, const char *value, int len)
OccurrencesPtr getPresences(const char *groupId)
bool _retrieveRecords(GbifRecordData *data, int limit)
GbifOccurrences(const char *url, const char *coordSystem)
bool _parseServiceInfo(const std::string *xml, ServiceInfo *info)
void info(const char *format,...)
'Info' level.
Definition: Log.cpp:256
double Coord
Type of map coordinates.
Definition: om_defs.hh:38
static void _startSearchElement(void *data, const char *el, const char **attr)
XML_Parser _parser
XML_Parser _parser
OccurrencesPtr _occurrences