openModeller  Version 1.5.0
ConfusionMatrix.cpp
Go to the documentation of this file.
1 
28 #include <stdio.h>
29 
31 #include <openmodeller/Sampler.hh>
36 #include <openmodeller/Log.hh>
37 
38 #include <string.h>
39 
40 ConfusionMatrix::ConfusionMatrix(Scalar predictionThreshold, bool ignoreAbsences)
41 {
42  reset(predictionThreshold, ignoreAbsences);
43 }
44 
45 
47 {
48 }
49 
50 void ConfusionMatrix::reset(Scalar predictionThreshold, bool ignoreAbsences)
51 {
52  _ready = false;
53  _predictionThreshold = predictionThreshold;
54  _ignoreAbsences = ignoreAbsences;
55  memset(_confMatrix, 0, sizeof(int) * 4);
56 }
57 
59 {
60  Log::instance()->debug( "Determining lowest training threshold\n" );
61 
62  model->setNormalization( sampler );
63 
64  OccurrencesPtr presences = sampler->getPresences();
65 
66  OccurrencesImpl::const_iterator it = presences->begin();
67  OccurrencesImpl::const_iterator fin = presences->end();
68 
70 
71  Scalar predictionValue;
72 
73  while( it != fin ) {
74 
75  Sample sample = (*it)->environment();
76 
77  if ( sample.size() > 0 ) {
78 
79  predictionValue = model->getValue( sample );
80 
81  if ( predictionValue > 0.0 && predictionValue < _predictionThreshold ) {
82 
83  _predictionThreshold = predictionValue;
84  }
85  }
86 
87  ++it;
88  }
89 
90  if ( _predictionThreshold > 1.0 ) {
91 
92  // Reset to default value
94 
95  Log::instance()->warn( "Could not find any valid threshold among all training points. Resetting confusion matrix threshold to the default value (%f)\n", CONF_MATRIX_DEFAULT_THRESHOLD );
96  }
97  else {
98 
99  Log::instance()->debug( "Lowest training threshold is %f\n", _predictionThreshold );
100  }
101 }
102 
103 /*
104  * Confusion Matrix:
105  * 1st row is predicted absence (index [0][x])
106  * 2nd row is predicted presence (index [1][x])
107  * 1st column is actual absence (index [y][0])
108  * 2nd column is actual presence (index [y][1])
109  */
110 
112  const Model& model,
113  const OccurrencesPtr& presences,
114  const OccurrencesPtr& absences)
115 {
116  Log::instance()->debug( "Calculating confusion matrix\n" );
117 
118  int i;
119  int predictionIndex, actualIndex;
120  Scalar predictionValue;
121 
123 
124  OccurrencesImpl::const_iterator it = presences->begin();
125  OccurrencesImpl::const_iterator fin = presences->end();
126 
127  Log::instance()->debug( "Testing presences\n" );
128 
129  i = 0;
130  while( it != fin ) {
131 
132  Sample sample;
133 
134  if ( (*it)->hasEnvironment() ) {
135 
136  sample = (*it)->environment();
137  }
138  else if ( env ) {
139 
140  sample = env->get( (*it)->x(), (*it)->y() );
141  }
142 
143  if ( sample.size() > 0 ) {
144 
145  ++i;
146 
147  predictionValue = model->getValue( sample );
148  predictionIndex = (predictionValue >= _predictionThreshold);
149 
150  actualIndex = 1; //data.isPresence(i);
151  _confMatrix[predictionIndex][actualIndex]++;
152 
153  Log::instance()->debug( "Probability for point %s (%f,%f): %f\n",
154  ((*it)->id()).c_str(), (*it)->x(), (*it)->y(), predictionValue );
155  }
156  else {
157 
158  Log::instance()->warn( "Skipping point (%s) with no environmental data!\n",
159  ((*it)->id()).c_str() );
160  }
161 
162  ++it;
163  }
164 
165  Log::instance()->debug( "Tested %u presence point(s)\n", i );
166 
167  if ( _ignoreAbsences ) {
168 
169  Log::instance()->debug( "Ignoring absence points\n" );
170  }
171  else {
172 
173  Log::instance()->debug( "Testing absences\n" );
174 
175  i = 0;
176 
177  if ( absences && ! absences->isEmpty() ) {
178 
179  it = absences->begin();
180  fin = absences->end();
181 
182  while( it != fin ) {
183 
184  Sample sample;
185 
186  if ( (*it)->hasEnvironment() ) {
187 
188  sample = (*it)->environment();
189  }
190  else if ( env ) {
191 
192  sample = env->get( (*it)->x(), (*it)->y() );
193  }
194 
195  if ( sample.size() > 0 ) {
196 
197  ++i;
198 
199  predictionValue = model->getValue( sample );
200  predictionIndex = (predictionValue >= _predictionThreshold);
201  actualIndex = 0; //data.isAbsence(i);
202  _confMatrix[predictionIndex][actualIndex]++;
203 
204  Log::instance()->debug( "Probability for point %s (%f,%f): %f\n",
205  ((*it)->id()).c_str(), (*it)->x(), (*it)->y(), predictionValue );
206  }
207  else {
208 
209  Log::instance()->warn( "Skipping point (%s) with no environmental data!\n",
210  ((*it)->id()).c_str() );
211  }
212 
213  ++it;
214  }
215  }
216 
217  Log::instance()->debug( "Tested %u absence point(s)\n", i );
218  }
219 
220  _ready = true;
221 }
222 
223 void ConfusionMatrix::calculate(const Model& model, const SamplerPtr& sampler)
224 {
225  model->setNormalization( sampler );
226 
227  calculate(sampler->getEnvironment(), model, sampler->getPresences(), sampler->getAbsences() );
228 }
229 
230 
231 int ConfusionMatrix::getValue(Scalar predictionValue,
232  Scalar actualValue) const
233 {
234  int predictedIndex, actualIndex;
235 
236  predictedIndex = (predictionValue >= _predictionThreshold);
237  actualIndex = (actualValue >= _predictionThreshold);
238 
239  return _confMatrix[predictedIndex][actualIndex];
240 }
241 
242 
244 {
245  Scalar total =
246  _confMatrix[0][0] + _confMatrix[0][1] +
247  _confMatrix[1][0] + _confMatrix[1][1];
248 
249  if (_ready && total)
250  return ( _confMatrix[0][0] + _confMatrix[1][1] ) / total;
251  else
252  return -1.0;
253 }
254 
255 
257 {
258  Scalar total = _confMatrix[1][0] + _confMatrix[0][0];
259  if (_ready && total)
260  return _confMatrix[1][0] / total;
261  else
262  return -1.0;
263 }
264 
265 
267 {
268  Scalar total = _confMatrix[0][1] + _confMatrix[1][1];
269  if (_ready && total)
270  return _confMatrix[0][1] / total;
271  else
272  return -1.0;
273 }
274 
277 {
278  ConfigurationPtr config( new ConfigurationImpl("ConfusionMatrix") );
279 
280  config->addNameValue( "Threshold", getThreshold() );
281  config->addNameValue( "Accuracy", getAccuracy() * 100 );
282  config->addNameValue( "OmissionError", getOmissionError() * 100 );
283  config->addNameValue( "CommissionError", getCommissionError() * 100 );
284  config->addNameValue( "TruePositives", getValue( 1, 1 ) );
285  config->addNameValue( "FalsePositives", getValue( 0, 1 ) );
286  config->addNameValue( "TrueNegatives", getValue( 0, 0 ) );
287  config->addNameValue( "FalseNegatives", getValue( 1, 0 ) );
288 
289  return config;
290 }
void reset(Scalar predictionThreshold=CONF_MATRIX_DEFAULT_THRESHOLD, bool ignoreAbsences=false)
void warn(const char *format,...)
'Warn' level.
Definition: Log.cpp:273
double Scalar
Type of map values.
Definition: om_defs.hh:39
double getAccuracy() const
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
int getValue(Scalar predictionValue, Scalar actualValue) const
double getCommissionError() const
void calculate(const EnvironmentPtr &env, const Model &model, const OccurrencesPtr &presences, const OccurrencesPtr &absences=OccurrencesPtr())
std::size_t size() const
Definition: Sample.hh:70
void setLowestTrainingThreshold(const Model &model, const SamplerPtr &sampler)
double getOmissionError() const
double getThreshold() const
std::vector< OccurrencePtr >::const_iterator const_iterator
Definition: Occurrences.hh:85
ConfusionMatrix(Scalar predictionThreshold=CONF_MATRIX_DEFAULT_THRESHOLD, bool ignoreAbsences=false)
ConfigurationPtr getConfiguration() const
#define CONF_MATRIX_DEFAULT_THRESHOLD
void debug(const char *format,...)
'Debug' level.
Definition: Log.cpp:237
Definition: Sample.hh:25