Main Page | Modules | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

omgscrapergbifrest.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2005 by Tim Sutton   *
00003  *   tim@linfiniti.com   *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU General Public License as published by  *
00007  *   the Free Software Foundation; either version 2 of the License, or     *
00008  *   (at your option) any later version.                                   *
00009  *                                                                         *
00010  *   This program is distributed in the hope that it will be useful,       *
00011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00013  *   GNU General Public License for more details.                          *
00014  *                                                                         *
00015  *   You should have received a copy of the GNU General Public License     *
00016  *   along with this program; if not, write to the                         *
00017  *   Free Software Foundation, Inc.,                                       *
00018  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00019  ***************************************************************************/
00020 
00021 #include "omgscrapergbifrest.h"
00022 #include "omgwebpagefetcher.h"
00023 #include "omgui.h"
00024 
00025 //QT Includes
00026 #include <QTimer>
00027 #include <QDomDocument>
00028 #include <QDomElement>
00029 #include <QRegExp>
00030 #include <QDebug>
00031 #include <QFileInfo>
00032 #include <QFile>
00033 #include <QTextStream>
00034 #include <QSettings>
00035 //needed for Q_EXPORT_PLUGIN macro at the end of this file
00036 #include <QtPlugin> 
00037 OmgScraperGbifRest::OmgScraperGbifRest(QObject * parent) : 
00038   QObject(parent),
00039   OmgScraperPluginInterface()
00040 {
00041   qDebug("OmgScraperGbifRest constructor called...");
00042 }
00043 
00044 
00045 OmgScraperGbifRest::~OmgScraperGbifRest()
00046 {
00047 }
00048 
00049 const QString  OmgScraperGbifRest::getName() 
00050 { 
00051   return QString("GBIF REST Web Service Plugin"); 
00052 }
00053 
00054 const QString OmgScraperGbifRest::getLicense()
00055 {
00056   QFile myQFile( ":/gbif_terms.txt" );
00057   QString myString;
00058   if ( myQFile.open( QIODevice::ReadOnly ) ) 
00059   {
00060     //now we parse the loc file, checking each line for its taxon
00061     QTextStream myStream( &myQFile );
00062     myString = myStream.readAll();
00063     myQFile.close();
00064   }
00065   else
00066   {
00067     myString="Terms and conditions document for GBIF could not be retrieved.";
00068   }
00069   return myString; 
00070 }
00071 
00072 bool OmgScraperGbifRest::search(QString theTaxonName, QString theFileName)
00073 {
00074 
00075   mTaxonName = theTaxonName;
00076   mFileName = theFileName;
00077 
00078   if (mTaxonName.isEmpty())
00079   {
00080     mMessenger.emitError("Taxon name is empty!");
00081     return false;
00082   }
00083 
00084   if (mFileName.isEmpty())
00085   {
00086     mMessenger.emitError("File name is empty!");
00087     return false;
00088   }
00089 
00090   mTaxonName = theTaxonName;
00091   mFileName = theFileName;
00092   QString mySearchName=theTaxonName.simplified();
00093   mySearchName=mySearchName.replace(" ","+");
00094   QSettings mySettings;
00095   QString myGbifUrl=mySettings.value("openModeller/gbifPlugin/url", "data.gbif.org").toString();
00096 
00097 
00098   //
00099   // Now we do a list query that allows us
00100   // to get the actual recs page by page
00101   //
00102   //
00103   
00104   //maximum allowed by GBIF
00105   const int myPageSize = 1000; 
00106   // the number of pages in the dataset
00107   // we will update this when we get our first query back
00108   int myPageCount = 0; 
00109   //useable recs only
00110   int myRecordCount=0; 
00111   // a flag to indicate whether we should try to get 
00112   // the next page of data
00113   bool myMoreDataFlag=true;
00114   while (myMoreDataFlag==true)
00115   {
00116     QString myStartString = QString::number(myPageCount * myPageSize);
00117     // An example search url:
00118     // http://data.gbif.org/ws/rest/occurrence/list?coordinateissues=false
00119     // &maxresults=1000&startindex=1000&mode=raw&coordinatestatus=true
00120     // &scientificname=Trifolium+repens&format=brief
00121     // Where:
00122     // &coordinatestatus=true  should limit responses to those with coordinates.  
00123     // &coordinateissues=false to exclude records where the coordinates 
00124     //                         do not match the supplied country name.
00125     // 
00126     // &maxresults=1000        page size to return
00127     // &startindex=1000        where in the recordset the page should start
00128     QString myUrl="http://" + myGbifUrl  + "/ws/rest/occurrence/list?scientificName=" 
00129       + mySearchName + "&format=brief&coordinatestatus=true&coordinateissues=false"
00130       + "&maxresults=1000&startindex=" + myStartString;
00131     qDebug ("GBIFRest scraper plugin URL for Search string = " + myUrl.toLocal8Bit());
00132     OmgWebPageFetcher myWebPageFetcher;
00133     connect(&myWebPageFetcher, SIGNAL(statusChanged(QString)),
00134         this, SLOT(setStatus(QString)));
00135     QString myResult = myWebPageFetcher.getPage(myUrl);
00136     QDomDocument myDocument("mydocument");
00137     myDocument.setContent(myResult);
00138     //
00139     // Check for the presence of gbif:nextRequestUrl element
00140     // which indicates there is another page to be fetched
00141     // after this one...
00142     //
00143     QDomNodeList myNextPageList= myDocument.elementsByTagName("gbif:nextRequestUrl");
00144     if (myNextPageList.length()<1)
00145     {
00146       //there are no more pages after this 
00147       myMoreDataFlag=false;
00148     }
00149     
00150     /*
00151        <gbif:occurrenceRecords>
00152         <to:TaxonOccurrence gbifKey="3985067" rdf:about="http://data.gbif.org/ws/rest/occurrence/get/3985067">
00153          <to:catalogNumber>72336</to:catalogNumber>
00154          <to:country>Chile</to:country>
00155          <to:decimalLatitude>-31.6666667</to:decimalLatitude>
00156          <to:decimalLongitude>-71.2166667</to:decimalLongitude>
00157          <to:earliestDateCollected>1989-02-04</to:earliestDateCollected>
00158          <to:identifiedTo>
00159           <to:Identification>
00160            <to:taxon>
00161              <tc:TaxonConcept gbifKey="5369461" rdf:about="http://data.gbif.org/ws/rest/taxon/get/5369461">
00162               <tc:hasName>
00163                 <tn:TaxonName>
00164                  <tn:nameComplete>Acacia saligna (Labill.) H. H. Wendl. or Wendl. f.</tn:nameComplete>
00165                  <tn:genusPart>Acacia</tn:genusPart>
00166                  <tn:specificEpithet>saligna</tn:specificEpithet>
00167                  <tn:authorship>(Labill.) H. H. Wendl. or Wendl. f.</tn:authorship>
00168                  <tn:scientific>true</tn:scientific>
00169                 </tn:TaxonName>
00170                </tc:hasName>
00171               </tc:TaxonConcept>
00172              </to:taxon>
00173             <to:taxonName>Acacia saligna (Labill.) H. H. Wendl. or Wendl. f.</to:taxonName>
00174            </to:Identification>
00175           </to:identifiedTo>
00176          <to:latestDateCollected>1989-02-04</to:latestDateCollected>
00177         </to:TaxonOccurrence>
00178        <gbif:occurrenceRecords>
00179        */
00180     //now loop through the occurrence records underneath that
00181 
00182     QDomNodeList myOccurrenceList = myDocument.elementsByTagName("to:TaxonOccurrence");
00183     for (int myCounter=0; myCounter < myOccurrenceList.size(); myCounter++) 
00184     {
00185       QDomNode myRecNode = myOccurrenceList.item(myCounter);
00186       QDomElement myRecElement = myRecNode.toElement();
00187       OmgLocality myLocality;
00188       QString myId = myRecElement.attribute("gbifKey");
00189       myLocality.setId(myId);
00190       QString myTaxonName = myRecElement.
00191         firstChildElement("to:identifiedTo").
00192         firstChildElement("to:Identification").
00193         firstChildElement("to:taxon").
00194         firstChildElement("tc:TaxonConcept").
00195         firstChildElement("tc:hasName").
00196         firstChildElement("tn:TaxonName").
00197         firstChildElement("tn:genusPart").
00198         text();
00199       QString mySpecificEpithet = myRecElement.
00200         firstChildElement("to:identifiedTo").
00201         firstChildElement("to:Identification").
00202         firstChildElement("to:taxon").
00203         firstChildElement("tc:TaxonConcept").
00204         firstChildElement("tc:hasName").
00205         firstChildElement("tn:TaxonName").
00206         firstChildElement("tn:specificEpithet").
00207         text();
00208       QString myReturnedName = QString(myTaxonName + " " + mySpecificEpithet).simplified();
00209       if (myReturnedName.isEmpty())
00210       {
00211         //fall back to the search string the user used
00212         myLocality.setLabel(theTaxonName.simplified());
00213       }
00214       else
00215       {
00216          myLocality.setLabel(myReturnedName);
00217       }
00218       QString myLatitude = myRecElement.firstChildElement("to:decimalLatitude").text();
00219       myLocality.setLatitude(myLatitude.toFloat());
00220       QString myLongitude = myRecElement.firstChildElement("to:decimalLongitude").text();
00221       myLocality.setLongitude(myLongitude.toFloat());
00222       if (!myLocality.isValid())
00223       {
00224         continue;
00225       }
00226       mLocalityVector.push_back(myLocality);
00227       ++myRecordCount;
00228     }
00229     ++myPageCount;
00230   }
00231   qDebug() <<   myRecordCount << " useable records found" ;
00232   //
00233   // Now build the shapefile
00234   //
00235   QString myTextFileName = createTextFile(mFileName);
00236   if (myTextFileName.isEmpty())
00237   {
00238     mMessenger.emitFileNotWritten(mTaxonName);
00239   }
00240   else
00241   {
00242     createShapefile(mFileName);
00243     mMessenger.emitFileWritten(mFileName, myTextFileName,mTaxonName,myRecordCount);
00244   }
00246   mLocalityVector.clear();
00247   return true;
00248 }
00249 
00250 
00251 void OmgScraperGbifRest::setStatus(QString theStatus)
00252 {
00253   mMessenger.emitMessage(theStatus); 
00254 }
00255 
00256 Q_EXPORT_PLUGIN2(gbifrest_scraper_plugin, OmgScraperGbifRest );

Generated on Mon Apr 28 15:09:34 2008 for openModellerDesktop by  doxygen 1.4.1-20050210