00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "omgscrapergbifrest.h"
00022 #include "omgwebpagefetcher.h"
00023 #include "omgui.h"
00024
00025
00026 #include <QTimer>
00027 #include <QDomDocument>
00028 #include <QDomElement>
00029 #include <QRegExp>
00030 #include <QDebug>
00031 #include <QFileInfo>
00032 #include <QFile>
00033 #include <QTextStream>
00034 #include <QSettings>
00035
00036 #include <QtPlugin>
00037 OmgScraperGbifRest::OmgScraperGbifRest(QObject * parent) :
00038 QObject(parent),
00039 OmgScraperPluginInterface()
00040 {
00041 qDebug("OmgScraperGbifRest constructor called...");
00042 }
00043
00044
00045 OmgScraperGbifRest::~OmgScraperGbifRest()
00046 {
00047 }
00048
00049 const QString OmgScraperGbifRest::getName()
00050 {
00051 return QString("GBIF REST Web Service Plugin");
00052 }
00053
00054 const QString OmgScraperGbifRest::getLicense()
00055 {
00056 QFile myQFile( ":/gbif_terms.txt" );
00057 QString myString;
00058 if ( myQFile.open( QIODevice::ReadOnly ) )
00059 {
00060
00061 QTextStream myStream( &myQFile );
00062 myString = myStream.readAll();
00063 myQFile.close();
00064 }
00065 else
00066 {
00067 myString="Terms and conditions document for GBIF could not be retrieved.";
00068 }
00069 return myString;
00070 }
00071
00072 bool OmgScraperGbifRest::search(QString theTaxonName, QString theFileName)
00073 {
00074
00075 mTaxonName = theTaxonName;
00076 mFileName = theFileName;
00077
00078 if (mTaxonName.isEmpty())
00079 {
00080 mMessenger.emitError("Taxon name is empty!");
00081 return false;
00082 }
00083
00084 if (mFileName.isEmpty())
00085 {
00086 mMessenger.emitError("File name is empty!");
00087 return false;
00088 }
00089
00090 mTaxonName = theTaxonName;
00091 mFileName = theFileName;
00092 QString mySearchName=theTaxonName.simplified();
00093 mySearchName=mySearchName.replace(" ","+");
00094 QSettings mySettings;
00095 QString myGbifUrl=mySettings.value("openModeller/gbifPlugin/url", "data.gbif.org").toString();
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105 const int myPageSize = 1000;
00106
00107
00108 int myPageCount = 0;
00109
00110 int myRecordCount=0;
00111
00112
00113 bool myMoreDataFlag=true;
00114 while (myMoreDataFlag==true)
00115 {
00116 QString myStartString = QString::number(myPageCount * myPageSize);
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128 QString myUrl="http://" + myGbifUrl + "/ws/rest/occurrence/list?scientificName="
00129 + mySearchName + "&format=brief&coordinatestatus=true&coordinateissues=false"
00130 + "&maxresults=1000&startindex=" + myStartString;
00131 qDebug ("GBIFRest scraper plugin URL for Search string = " + myUrl.toLocal8Bit());
00132 OmgWebPageFetcher myWebPageFetcher;
00133 connect(&myWebPageFetcher, SIGNAL(statusChanged(QString)),
00134 this, SLOT(setStatus(QString)));
00135 QString myResult = myWebPageFetcher.getPage(myUrl);
00136 QDomDocument myDocument("mydocument");
00137 myDocument.setContent(myResult);
00138
00139
00140
00141
00142
00143 QDomNodeList myNextPageList= myDocument.elementsByTagName("gbif:nextRequestUrl");
00144 if (myNextPageList.length()<1)
00145 {
00146
00147 myMoreDataFlag=false;
00148 }
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182 QDomNodeList myOccurrenceList = myDocument.elementsByTagName("to:TaxonOccurrence");
00183 for (int myCounter=0; myCounter < myOccurrenceList.size(); myCounter++)
00184 {
00185 QDomNode myRecNode = myOccurrenceList.item(myCounter);
00186 QDomElement myRecElement = myRecNode.toElement();
00187 OmgLocality myLocality;
00188 QString myId = myRecElement.attribute("gbifKey");
00189 myLocality.setId(myId);
00190 QString myTaxonName = myRecElement.
00191 firstChildElement("to:identifiedTo").
00192 firstChildElement("to:Identification").
00193 firstChildElement("to:taxon").
00194 firstChildElement("tc:TaxonConcept").
00195 firstChildElement("tc:hasName").
00196 firstChildElement("tn:TaxonName").
00197 firstChildElement("tn:genusPart").
00198 text();
00199 QString mySpecificEpithet = myRecElement.
00200 firstChildElement("to:identifiedTo").
00201 firstChildElement("to:Identification").
00202 firstChildElement("to:taxon").
00203 firstChildElement("tc:TaxonConcept").
00204 firstChildElement("tc:hasName").
00205 firstChildElement("tn:TaxonName").
00206 firstChildElement("tn:specificEpithet").
00207 text();
00208 QString myReturnedName = QString(myTaxonName + " " + mySpecificEpithet).simplified();
00209 if (myReturnedName.isEmpty())
00210 {
00211
00212 myLocality.setLabel(theTaxonName.simplified());
00213 }
00214 else
00215 {
00216 myLocality.setLabel(myReturnedName);
00217 }
00218 QString myLatitude = myRecElement.firstChildElement("to:decimalLatitude").text();
00219 myLocality.setLatitude(myLatitude.toFloat());
00220 QString myLongitude = myRecElement.firstChildElement("to:decimalLongitude").text();
00221 myLocality.setLongitude(myLongitude.toFloat());
00222 if (!myLocality.isValid())
00223 {
00224 continue;
00225 }
00226 mLocalityVector.push_back(myLocality);
00227 ++myRecordCount;
00228 }
00229 ++myPageCount;
00230 }
00231 qDebug() << myRecordCount << " useable records found" ;
00232
00233
00234
00235 QString myTextFileName = createTextFile(mFileName);
00236 if (myTextFileName.isEmpty())
00237 {
00238 mMessenger.emitFileNotWritten(mTaxonName);
00239 }
00240 else
00241 {
00242 createShapefile(mFileName);
00243 mMessenger.emitFileWritten(mFileName, myTextFileName,mTaxonName,myRecordCount);
00244 }
00246 mLocalityVector.clear();
00247 return true;
00248 }
00249
00250
00251 void OmgScraperGbifRest::setStatus(QString theStatus)
00252 {
00253 mMessenger.emitMessage(theStatus);
00254 }
00255
00256 Q_EXPORT_PLUGIN2(gbifrest_scraper_plugin, OmgScraperGbifRest );