Main Page | Modules | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

omgtextfilesplitter.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2005 by Tim Sutton   *
00003  *   aps02ts@macbuntu   *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU General Public License as published by  *
00007  *   the Free Software Foundation; either version 2 of the License, or     *
00008  *   (at your option) any later version.                                   *
00009  *                                                                         *
00010  *   This program is distributed in the hope that it will be useful,       *
00011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00013  *   GNU General Public License for more details.                          *
00014  *                                                                         *
00015  *   You should have received a copy of the GNU General Public License     *
00016  *   along with this program; if not, write to the                         *
00017  *   Free Software Foundation, Inc.,                                       *
00018  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00019  ***************************************************************************/
00020 #include "omgtextfilesplitter.h"
00021 #include <qfile.h>
00022 #include <qtextstream.h>
00023 #include <qregexp.h>
00024 #include <qstring.h>
00025 #include <qlineedit.h>
00026 #include <qfiledialog.h>
00027 #include <qfileinfo.h>
00028 #include <qsettings.h>
00029 #include <qapplication.h>
00030 #include <qcursor.h>
00031 #include <QRadioButton>
00032 #include <QMessageBox>
00033 
00034 OmgTextFileSplitter::OmgTextFileSplitter() : QDialog()
00035 {
00036   setupUi(this);
00037   QSettings myQSettings;
00038   leFileName->setText(myQSettings.value("/TextFileSplitter/leFileName").toString());
00039   leDirName->setText(myQSettings.value("/TextFileSplitter/leDirName").toString());
00040 }
00041 
00042 
00043 OmgTextFileSplitter::~OmgTextFileSplitter()
00044 {}
00045 
00046 void OmgTextFileSplitter::split(QString theFileNameString, QString theOutputDirString, InputType theInputType=Default)
00047 {
00048   //
00049   // Now that we have the localities text file, we need to parse it
00050   // and split it into seperate files
00051   //
00052   //first build a regex to match text at the beginning of the line
00053   QRegExp myQRegExp;
00054   if (theInputType==Default)
00055   {
00056     qDebug( "Genus species headers file" );
00057     myQRegExp =  QRegExp("^[^#][a-zA-Z][ a-zA-Z\t]*") ; //second caret means 'not'
00058   }
00059   else 
00060   {
00061     qDebug("Comma Delimited File or openModeller format file");
00062     myQRegExp=QRegExp("");
00063   }
00064   QString myTaxonName;
00065   QFile myInputFile (theFileNameString);
00066   // open the output file initially to the input file
00067   // the first time a taxon is encountered the open file will
00068   // be changed to an output file for that taxon
00069   QFile myOutputFile (theFileNameString);
00070   QTextStream myOutputTextStream( &myOutputFile );
00071   if ( myInputFile.open( QIODevice::ReadOnly ) )
00072   {
00073     // set the cursor to hour glass
00074     QApplication::setOverrideCursor( QCursor(Qt::WaitCursor) );
00075 
00076     //now we parse the loc file, checking each line for its taxon
00077     QTextStream myInputTextStream( &myInputFile );
00078     QString myCurrentLineQString;
00079     QString myLastTaxonName = "";
00080     int myFileCount=0; //store how many files we output
00081     while ( !myInputTextStream.atEnd() )
00082     {
00083       QString   myLong, myLat;
00084       QStringList myList; 
00085 
00086       myCurrentLineQString = myInputTextStream.readLine(); // line of text excluding '\n'
00087       // if line is just blank dont bother doing any work
00088       if (myCurrentLineQString.simplified().isEmpty())
00089       {
00090         continue;
00091       }
00092 
00093       //
00094       // Clean up the line a litle
00095       //
00096       QString myLine = myCurrentLineQString;
00097       myLine = myLine.simplified().replace("\t"," ");
00098       myLine = myLine.trimmed();
00099 
00100       //
00101       //see if this line contains a taxon
00102       //
00103       if (theInputType==Default)
00104       {
00105         //qDebug("Input type default");
00106         myList = myLine.split(" ");
00107         if (myQRegExp.lastIndexIn(myCurrentLineQString) != -1)
00108         {
00109           //            qDebug("Found name");
00110           QStringList myMatchesQStringList = myQRegExp.capturedTexts();
00111           QStringList::Iterator myIterator = myMatchesQStringList.begin();
00112           myTaxonName=*myIterator;
00113 
00114           myTaxonName=myTaxonName.simplified();
00115           if (myTaxonName != "" && myMatchesQStringList.size() > 0)
00116           {
00117             //make sure there are not tabs separating words.
00118             myTaxonName=myTaxonName.replace( QRegExp("\t"), " " );
00119             //make sure there are only single spaces separating words.
00120             myTaxonName=myTaxonName.replace( QRegExp(" {2,}"), " " );
00121             //get rid of latitude and longitude if they exist.
00122             myTaxonName=myTaxonName.replace( QRegExp("Latitude"), "" );
00123             myTaxonName=myTaxonName.replace( QRegExp("Longitude"), "" );
00124             myTaxonName=myTaxonName.simplified();
00125           }
00126         }
00127       }
00128       else if (theInputType==OpenModeller) //openmodeller  delimted format
00129       {
00130         myList = myLine.split(" ");
00131         if (myList.size() < 4)
00132         {
00133           continue;
00134         }
00135         myTaxonName=myList.at(0).simplified() + " " + myList.at(1).simplified();
00136       }
00137       else //comma delimted
00138       {
00139         myList = myLine.split(",");
00140         if (myList.size() < 4)
00141         {
00142           continue;
00143         }
00144         myTaxonName=myList.at(0).simplified() + " " + myList.at(1).simplified();
00145       }
00146       //qDebug(myLastTaxonName.toAscii() + " " + myTaxonName.toAscii());
00147       if (myLastTaxonName!=myTaxonName)
00148       {
00149         ++myFileCount;
00150         //Make a filename from the species name to output its associated localities into
00151         QString myOutputFileName = myTaxonName;
00152         myOutputFileName.replace(" ","_");
00153         myOutputFileName = myOutputFileName+".txt";
00154         //qDebug("Found Taxon Name: " + myTaxonName + "...Saving its localities into: " + theOutputDirString + myOutputFileName );
00155         // create the output file for this taxon
00156         myOutputFile.close();
00157         myOutputFile.setFileName(theOutputDirString + QDir::separator() + myOutputFileName);
00158         //note file is not appended to but overwritten!
00159         myOutputFile.open(QIODevice::WriteOnly ); 
00160         myOutputTextStream.setDevice(&myOutputFile);
00161       }  //line is a new taxon name
00162 
00163       myLastTaxonName=myTaxonName;
00164       if (theInputType==Default)        
00165       {
00166         if (myList.size() < 2 )
00167         {
00168           continue;
00169         }       
00170         if (radLongitudeFirst->isChecked())
00171         {
00172           myLong = myList.at(0);
00173           myLat = myList.at(1);
00174         }
00175         else
00176         {
00177           myLong = myList.at(1);
00178           myLat = myList.at(0);
00179         }
00180       }
00181       else //comma or openmodeller delim
00182       {
00183         if (myList.size() < 4 )
00184         {
00185           continue;
00186         }       
00187         if (radLongitudeFirst->isChecked())
00188         {
00189           //  qDebug("Getteing lat and long");
00190           myLong = myList.at(2);
00191           myLat = myList.at(3);
00192         }
00193         else
00194         {
00195           myLong = myList.at(3);
00196           myLat = myList.at(2);
00197         }
00198 
00199       }
00200       //qDebug( myTaxonName.toAscii()); 
00201       myOutputTextStream << myTaxonName.toLocal8Bit() << " " <<  myLong << " " << myLat << "\r\n";
00202     }
00203     myInputFile.close();
00204     myOutputFile.close();
00205     QApplication::restoreOverrideCursor();
00206     QMessageBox::information( this,tr("openModeller Desktop"),tr("File splitter completed successfully. ") +
00207             QString::number(myFileCount) + tr(" new files were created.") );
00208   }
00209 }
00210 
00211 /*
00212    Although it is easy to implement a custom slot in the dialog and connect it in the constructor, we could instead use uic's auto-connection facilities to connect the OK button's clicked() signal to a slot in our subclass. To do this, we only need to declare and implement a slot with a name that follows a standard convention:
00213 
00214    void on_<widget name>_<signal name>(<signal parameters>);
00215    */
00216 
00217 void OmgTextFileSplitter::on_pbnFileSelector_clicked()
00218 {
00219   qDebug("File Selector Button clicked");
00220   QFileInfo myFileInfo(leFileName->text());
00221   QString myDirPath = myFileInfo.dir().dirName();
00222   QString myFileName = QFileDialog::getOpenFileName(
00223       this,
00224       "Choose a file containing occurrence data", 
00225       myDirPath,
00226       "Text Files (*.txt *.csv *.asc)");
00227   if (!myFileName.isEmpty())
00228   {
00229     leFileName->setText(myFileName);
00230   }
00231 
00232 }
00233 /*
00234    Although it is easy to implement a custom slot in the dialog and connect it in the constructor, we could instead use uic's auto-connection facilities to connect the OK button's clicked() signal to a slot in our subclass. To do this, we only need to declare and implement a slot with a name that follows a standard convention:
00235 
00236    void on_<widget name>_<signal name>(<signal parameters>);
00237    */
00238 void OmgTextFileSplitter::on_pbnDirectorySelector_clicked()
00239 {
00240   qDebug("Directory Selector Button clicked");
00241   QString myDirPath = QFileDialog::getExistingDirectory(
00242       this,
00243       "Choose a directory for the split files",
00244       leDirName->text()
00245       );
00246   if (!myDirPath.isEmpty())
00247   {
00248     leDirName->setText(myDirPath);
00249   }
00250 }
00251 void OmgTextFileSplitter::accept()
00252 {
00253   QSettings myQSettings;
00254   myQSettings.setValue("/TextFileSplitter/leFileName",leFileName->text());
00255   myQSettings.setValue("/TextFileSplitter/leDirName",leDirName->text());
00256   if (radCommaDelimited->isChecked())
00257   {
00258     split(leFileName->text(),leDirName->text(),CommaDelimited);
00259   }
00260   else if (radOpenModeller->isChecked())
00261   {
00262     split(leFileName->text(),leDirName->text(),OpenModeller);
00263   }
00264   else //genus species headers
00265   {
00266     split(leFileName->text(),leDirName->text(),Default);
00267   }
00268   close();
00269 }

Generated on Mon Apr 28 15:07:44 2008 for openModellerDesktop by  doxygen 1.4.1-20050210