openModeller
Version 1.4.0
|
00001 00028 #include <stdio.h> 00029 00030 #include <openmodeller/ConfusionMatrix.hh> 00031 #include <openmodeller/Sampler.hh> 00032 #include <openmodeller/Algorithm.hh> 00033 #include <openmodeller/Occurrences.hh> 00034 #include <openmodeller/Environment.hh> 00035 #include <openmodeller/Configuration.hh> 00036 #include <openmodeller/Log.hh> 00037 00038 #include <string.h> 00039 00040 ConfusionMatrix::ConfusionMatrix(Scalar predictionThreshold, bool ignoreAbsences) 00041 { 00042 reset(predictionThreshold, ignoreAbsences); 00043 } 00044 00045 00046 ConfusionMatrix::~ConfusionMatrix() 00047 { 00048 } 00049 00050 void ConfusionMatrix::reset(Scalar predictionThreshold, bool ignoreAbsences) 00051 { 00052 _ready = false; 00053 _predictionThreshold = predictionThreshold; 00054 _ignoreAbsences = ignoreAbsences; 00055 memset(_confMatrix, 0, sizeof(int) * 4); 00056 } 00057 00058 void ConfusionMatrix::setLowestTrainingThreshold(const Model& model, const SamplerPtr& sampler) 00059 { 00060 Log::instance()->debug( "Determining lowest training threshold\n" ); 00061 00062 model->setNormalization( sampler ); 00063 00064 OccurrencesPtr presences = sampler->getPresences(); 00065 00066 OccurrencesImpl::const_iterator it = presences->begin(); 00067 OccurrencesImpl::const_iterator fin = presences->end(); 00068 00069 _predictionThreshold = 2.0; 00070 00071 Scalar predictionValue; 00072 00073 while( it != fin ) { 00074 00075 Sample sample = (*it)->environment(); 00076 00077 if ( sample.size() > 0 ) { 00078 00079 predictionValue = model->getValue( sample ); 00080 00081 if ( predictionValue > 0.0 && predictionValue < _predictionThreshold ) { 00082 00083 _predictionThreshold = predictionValue; 00084 } 00085 } 00086 00087 ++it; 00088 } 00089 00090 if ( _predictionThreshold > 1.0 ) { 00091 00092 // Reset to default value 00093 _predictionThreshold = CONF_MATRIX_DEFAULT_THRESHOLD; 00094 00095 Log::instance()->warn( "Could not find any valid threshold among all training points. Resetting confusion matrix threshold to the default value (%f)\n", CONF_MATRIX_DEFAULT_THRESHOLD ); 00096 } 00097 else { 00098 00099 Log::instance()->debug( "Lowest training threshold is %f\n", _predictionThreshold ); 00100 } 00101 } 00102 00103 /* 00104 * Confusion Matrix: 00105 * 1st row is predicted absence (index [0][x]) 00106 * 2nd row is predicted presence (index [1][x]) 00107 * 1st column is actual absence (index [y][0]) 00108 * 2nd column is actual presence (index [y][1]) 00109 */ 00110 00111 void ConfusionMatrix::calculate(const EnvironmentPtr & env, 00112 const Model& model, 00113 const OccurrencesPtr& presences, 00114 const OccurrencesPtr& absences) 00115 { 00116 Log::instance()->debug( "Calculating confusion matrix\n" ); 00117 00118 int i; 00119 int predictionIndex, actualIndex; 00120 Scalar predictionValue; 00121 00122 reset(_predictionThreshold,_ignoreAbsences); 00123 00124 OccurrencesImpl::const_iterator it = presences->begin(); 00125 OccurrencesImpl::const_iterator fin = presences->end(); 00126 00127 Log::instance()->debug( "Testing presences\n" ); 00128 00129 i = 0; 00130 while( it != fin ) { 00131 00132 Sample sample; 00133 00134 if ( (*it)->hasEnvironment() ) { 00135 00136 sample = (*it)->environment(); 00137 } 00138 else if ( env ) { 00139 00140 sample = env->get( (*it)->x(), (*it)->y() ); 00141 } 00142 00143 if ( sample.size() > 0 ) { 00144 00145 ++i; 00146 00147 predictionValue = model->getValue( sample ); 00148 predictionIndex = (predictionValue >= _predictionThreshold); 00149 00150 actualIndex = 1; //data.isPresence(i); 00151 _confMatrix[predictionIndex][actualIndex]++; 00152 00153 Log::instance()->debug( "Probability for point %s (%f,%f): %f\n", 00154 ((*it)->id()).c_str(), (*it)->x(), (*it)->y(), predictionValue ); 00155 } 00156 else { 00157 00158 Log::instance()->warn( "Skipping point (%s) with no environmental data!\n", 00159 ((*it)->id()).c_str() ); 00160 } 00161 00162 ++it; 00163 } 00164 00165 Log::instance()->debug( "Tested %u presence point(s)\n", i ); 00166 00167 if ( _ignoreAbsences ) { 00168 00169 Log::instance()->debug( "Ignoring absence points\n" ); 00170 } 00171 else { 00172 00173 Log::instance()->debug( "Testing absences\n" ); 00174 00175 i = 0; 00176 00177 if ( absences && ! absences->isEmpty() ) { 00178 00179 it = absences->begin(); 00180 fin = absences->end(); 00181 00182 while( it != fin ) { 00183 00184 Sample sample; 00185 00186 if ( (*it)->hasEnvironment() ) { 00187 00188 sample = (*it)->environment(); 00189 } 00190 else if ( env ) { 00191 00192 sample = env->get( (*it)->x(), (*it)->y() ); 00193 } 00194 00195 if ( sample.size() > 0 ) { 00196 00197 ++i; 00198 00199 predictionValue = model->getValue( sample ); 00200 predictionIndex = (predictionValue >= _predictionThreshold); 00201 actualIndex = 0; //data.isAbsence(i); 00202 _confMatrix[predictionIndex][actualIndex]++; 00203 00204 Log::instance()->debug( "Probability for point %s (%f,%f): %f\n", 00205 ((*it)->id()).c_str(), (*it)->x(), (*it)->y(), predictionValue ); 00206 } 00207 else { 00208 00209 Log::instance()->warn( "Skipping point (%s) with no environmental data!\n", 00210 ((*it)->id()).c_str() ); 00211 } 00212 00213 ++it; 00214 } 00215 } 00216 00217 Log::instance()->debug( "Tested %u absence point(s)\n", i ); 00218 } 00219 00220 _ready = true; 00221 } 00222 00223 void ConfusionMatrix::calculate(const Model& model, const SamplerPtr& sampler) 00224 { 00225 model->setNormalization( sampler ); 00226 00227 calculate(sampler->getEnvironment(), model, sampler->getPresences(), sampler->getAbsences() ); 00228 } 00229 00230 00231 int ConfusionMatrix::getValue(Scalar predictionValue, 00232 Scalar actualValue) const 00233 { 00234 int predictedIndex, actualIndex; 00235 00236 predictedIndex = (predictionValue >= _predictionThreshold); 00237 actualIndex = (actualValue >= _predictionThreshold); 00238 00239 return _confMatrix[predictedIndex][actualIndex]; 00240 } 00241 00242 00243 double ConfusionMatrix::getAccuracy() const 00244 { 00245 Scalar total = 00246 _confMatrix[0][0] + _confMatrix[0][1] + 00247 _confMatrix[1][0] + _confMatrix[1][1]; 00248 00249 if (_ready && total) 00250 return ( _confMatrix[0][0] + _confMatrix[1][1] ) / total; 00251 else 00252 return -1.0; 00253 } 00254 00255 00256 double ConfusionMatrix::getCommissionError() const 00257 { 00258 Scalar total = _confMatrix[1][0] + _confMatrix[0][0]; 00259 if (_ready && total) 00260 return _confMatrix[1][0] / total; 00261 else 00262 return -1.0; 00263 } 00264 00265 00266 double ConfusionMatrix::getOmissionError() const 00267 { 00268 Scalar total = _confMatrix[0][1] + _confMatrix[1][1]; 00269 if (_ready && total) 00270 return _confMatrix[0][1] / total; 00271 else 00272 return -1.0; 00273 } 00274 00275 ConfigurationPtr 00276 ConfusionMatrix::getConfiguration() const 00277 { 00278 ConfigurationPtr config( new ConfigurationImpl("ConfusionMatrix") ); 00279 00280 config->addNameValue( "Threshold", getThreshold() ); 00281 config->addNameValue( "Accuracy", getAccuracy() * 100 ); 00282 config->addNameValue( "OmissionError", getOmissionError() * 100 ); 00283 config->addNameValue( "CommissionError", getCommissionError() * 100 ); 00284 config->addNameValue( "TruePositives", getValue( 1, 1 ) ); 00285 config->addNameValue( "FalsePositives", getValue( 0, 1 ) ); 00286 config->addNameValue( "TrueNegatives", getValue( 0, 0 ) ); 00287 config->addNameValue( "FalseNegatives", getValue( 1, 0 ) ); 00288 00289 return config; 00290 }