openModeller  Version 1.4.0
ConfusionMatrix.cpp
Go to the documentation of this file.
00001 
00028 #include <stdio.h>
00029 
00030 #include <openmodeller/ConfusionMatrix.hh>
00031 #include <openmodeller/Sampler.hh>
00032 #include <openmodeller/Algorithm.hh>
00033 #include <openmodeller/Occurrences.hh>
00034 #include <openmodeller/Environment.hh>
00035 #include <openmodeller/Configuration.hh>
00036 #include <openmodeller/Log.hh>
00037 
00038 #include <string.h>
00039 
00040 ConfusionMatrix::ConfusionMatrix(Scalar predictionThreshold, bool ignoreAbsences)
00041 {
00042   reset(predictionThreshold, ignoreAbsences);
00043 }
00044 
00045 
00046 ConfusionMatrix::~ConfusionMatrix()
00047 {
00048 }
00049 
00050 void ConfusionMatrix::reset(Scalar predictionThreshold, bool ignoreAbsences)
00051 {
00052   _ready = false;
00053   _predictionThreshold = predictionThreshold;
00054   _ignoreAbsences = ignoreAbsences;
00055   memset(_confMatrix, 0, sizeof(int) * 4);
00056 }
00057 
00058 void ConfusionMatrix::setLowestTrainingThreshold(const Model& model, const SamplerPtr& sampler)
00059 {
00060   Log::instance()->debug( "Determining lowest training threshold\n" );
00061 
00062   model->setNormalization( sampler );
00063 
00064   OccurrencesPtr presences = sampler->getPresences();
00065 
00066   OccurrencesImpl::const_iterator it = presences->begin();
00067   OccurrencesImpl::const_iterator fin = presences->end();
00068 
00069   _predictionThreshold = 2.0;
00070 
00071   Scalar predictionValue;
00072 
00073   while( it != fin ) {
00074 
00075     Sample sample = (*it)->environment();
00076 
00077     if ( sample.size() > 0 ) {
00078 
00079       predictionValue = model->getValue( sample );
00080 
00081       if ( predictionValue > 0.0 && predictionValue < _predictionThreshold ) {
00082 
00083         _predictionThreshold = predictionValue;
00084       }
00085     }
00086 
00087     ++it;
00088   }
00089 
00090   if ( _predictionThreshold > 1.0 ) {
00091 
00092     // Reset to default value
00093     _predictionThreshold = CONF_MATRIX_DEFAULT_THRESHOLD;
00094 
00095     Log::instance()->warn( "Could not find any valid threshold among all training points. Resetting confusion matrix threshold to the default value (%f)\n", CONF_MATRIX_DEFAULT_THRESHOLD );
00096   }
00097   else {
00098 
00099     Log::instance()->debug( "Lowest training threshold is %f\n", _predictionThreshold );
00100   }
00101 }
00102 
00103 /* 
00104  * Confusion Matrix:
00105  *  1st row is predicted absence  (index [0][x])
00106  *  2nd row is predicted presence (index [1][x])
00107  *  1st column is actual absence  (index [y][0])
00108  *  2nd column is actual presence (index [y][1])
00109  */
00110 
00111 void ConfusionMatrix::calculate(const EnvironmentPtr & env,
00112         const Model& model,
00113         const OccurrencesPtr& presences, 
00114         const OccurrencesPtr& absences)
00115 {
00116   Log::instance()->debug( "Calculating confusion matrix\n" );
00117 
00118   int i;
00119   int predictionIndex, actualIndex;
00120   Scalar predictionValue;
00121 
00122   reset(_predictionThreshold,_ignoreAbsences);
00123 
00124   OccurrencesImpl::const_iterator it = presences->begin();
00125   OccurrencesImpl::const_iterator fin = presences->end();
00126 
00127   Log::instance()->debug( "Testing presences\n" );
00128 
00129   i = 0;
00130   while( it != fin ) {
00131 
00132     Sample sample; 
00133 
00134     if ( (*it)->hasEnvironment() ) {
00135 
00136       sample = (*it)->environment();
00137     }
00138     else if ( env ) {
00139 
00140       sample = env->get( (*it)->x(), (*it)->y() );
00141     }
00142 
00143     if ( sample.size() > 0 ) {
00144 
00145       ++i;
00146 
00147       predictionValue = model->getValue( sample );
00148       predictionIndex = (predictionValue >= _predictionThreshold);
00149 
00150       actualIndex = 1; //data.isPresence(i);
00151       _confMatrix[predictionIndex][actualIndex]++;
00152 
00153       Log::instance()->debug( "Probability for point %s (%f,%f): %f\n", 
00154                    ((*it)->id()).c_str(), (*it)->x(), (*it)->y(), predictionValue );
00155     }
00156     else {
00157 
00158       Log::instance()->warn( "Skipping point (%s) with no environmental data!\n", 
00159                    ((*it)->id()).c_str() );
00160     }
00161 
00162     ++it;
00163   }
00164 
00165   Log::instance()->debug( "Tested %u presence point(s)\n", i );
00166 
00167   if ( _ignoreAbsences ) {
00168 
00169     Log::instance()->debug( "Ignoring absence points\n" );
00170   }
00171   else {
00172 
00173     Log::instance()->debug( "Testing absences\n" );
00174 
00175     i = 0;
00176 
00177     if ( absences && ! absences->isEmpty() ) {
00178 
00179       it = absences->begin();
00180       fin = absences->end();
00181 
00182       while( it != fin ) {
00183 
00184         Sample sample;
00185 
00186         if ( (*it)->hasEnvironment() ) {
00187 
00188     sample = (*it)->environment();
00189         }
00190         else if ( env ) {
00191 
00192     sample = env->get( (*it)->x(), (*it)->y() );
00193         }
00194 
00195         if ( sample.size() > 0 ) {
00196 
00197           ++i;
00198 
00199     predictionValue = model->getValue( sample );
00200     predictionIndex = (predictionValue >= _predictionThreshold);
00201     actualIndex = 0; //data.isAbsence(i);
00202           _confMatrix[predictionIndex][actualIndex]++;
00203 
00204           Log::instance()->debug( "Probability for point %s (%f,%f): %f\n", 
00205                        ((*it)->id()).c_str(), (*it)->x(), (*it)->y(), predictionValue );
00206         }
00207         else {
00208 
00209           Log::instance()->warn( "Skipping point (%s) with no environmental data!\n", 
00210                        ((*it)->id()).c_str() );
00211         }
00212 
00213         ++it;
00214       }
00215     }
00216 
00217     Log::instance()->debug( "Tested %u absence point(s)\n", i );
00218   }
00219 
00220   _ready = true;
00221 }
00222 
00223 void ConfusionMatrix::calculate(const Model& model, const SamplerPtr& sampler)
00224 {
00225   model->setNormalization( sampler );
00226 
00227   calculate(sampler->getEnvironment(), model, sampler->getPresences(), sampler->getAbsences() );
00228 }
00229 
00230 
00231 int ConfusionMatrix::getValue(Scalar predictionValue, 
00232             Scalar actualValue) const
00233 {
00234   int predictedIndex, actualIndex;
00235 
00236   predictedIndex = (predictionValue >= _predictionThreshold);
00237   actualIndex    = (actualValue     >= _predictionThreshold);
00238 
00239   return _confMatrix[predictedIndex][actualIndex];
00240 }
00241 
00242 
00243 double ConfusionMatrix::getAccuracy() const
00244 {
00245   Scalar total = 
00246     _confMatrix[0][0] + _confMatrix[0][1] + 
00247     _confMatrix[1][0] + _confMatrix[1][1];
00248   
00249   if (_ready && total)
00250     return ( _confMatrix[0][0] + _confMatrix[1][1] ) / total;
00251   else
00252     return -1.0;
00253 }
00254 
00255 
00256 double ConfusionMatrix::getCommissionError() const
00257 {
00258   Scalar total = _confMatrix[1][0] + _confMatrix[0][0];
00259   if (_ready && total)
00260     return _confMatrix[1][0] / total;
00261   else
00262     return -1.0;
00263 }
00264 
00265 
00266 double ConfusionMatrix::getOmissionError() const
00267 {
00268   Scalar total = _confMatrix[0][1] + _confMatrix[1][1];
00269   if (_ready && total)
00270     return _confMatrix[0][1] / total;
00271   else
00272     return -1.0;
00273 }
00274 
00275 ConfigurationPtr 
00276 ConfusionMatrix::getConfiguration() const
00277 {
00278   ConfigurationPtr config( new ConfigurationImpl("ConfusionMatrix") );
00279 
00280   config->addNameValue( "Threshold", getThreshold() );
00281   config->addNameValue( "Accuracy", getAccuracy() * 100 );
00282   config->addNameValue( "OmissionError", getOmissionError() * 100 );
00283   config->addNameValue( "CommissionError", getCommissionError() * 100 );
00284   config->addNameValue( "TruePositives", getValue( 1, 1 ) );
00285   config->addNameValue( "FalsePositives", getValue( 0, 1 ) );
00286   config->addNameValue( "TrueNegatives", getValue( 0, 0 ) );
00287   config->addNameValue( "FalseNegatives", getValue( 1, 0 ) );
00288 
00289   return config;
00290 }