openModeller
Version 1.4.0
|
00001 00026 #ifndef _ROCCURVEHH_ 00027 #define _ROCCURVEHH_ 00028 00029 #include <openmodeller/om_defs.hh> 00030 #include <openmodeller/Configuration.hh> 00031 00032 #include <openmodeller/Model.hh> 00033 00034 #include <openmodeller/Occurrences.hh> 00035 #include <openmodeller/Environment.hh> 00036 #include <openmodeller/Sampler.hh> 00037 00038 #include <map> 00039 00040 #define ROC_DEFAULT_RESOLUTION 15 00041 #define ROC_DEFAULT_BACKGROUND_POINTS 10000 00042 00046 class dllexp RocCurve 00047 { 00048 public: 00052 RocCurve(); 00053 00057 ~RocCurve(); 00058 00066 void initialize( int resolution=ROC_DEFAULT_RESOLUTION ); 00067 00075 void initialize( int resolution, int num_background_points ); 00076 00086 void initialize( int resolution, bool use_absences_as_background ); 00087 00091 void reset(); 00092 00100 void calculate( const Model& model, const SamplerPtr& sampler ); 00101 00106 int numPoints() const { return _data.size(); } 00107 00115 double getX( int point_index ) const { return _data[point_index][0]; } 00116 00123 double getY( int point_index ) const { return _data[point_index][1]; } 00124 00129 double getTotalArea(); 00130 00139 double getPartialAreaRatio( double e=1.0 ); 00140 00144 bool ready() const { return _ready; } 00145 00149 ConfigurationPtr getConfiguration() const; 00150 00151 private: 00152 00156 struct VectorCompare { 00157 00158 bool operator () ( const std::vector<Scalar> &a, const std::vector<Scalar> &b ) const { 00159 00160 if ( a[0] != b[0] ) { 00161 00162 return a[0] < b[0]; // Compare 1 - specificity. 00163 } 00164 else { 00165 00166 return a[1] < b[1]; // Compare sensitivity (1 - specificity values are equal). 00167 } 00168 } 00169 }; 00170 00176 void _loadPredictions( const Model& model, const SamplerPtr& sampler ); 00177 00181 void _calculateGraphPoints(); 00182 00186 bool _calculateTotalArea(); 00187 00188 std::vector<int> _category; // 0=absence, 1=presence 00189 std::vector<Scalar> _prediction; // associated probabilities 00190 00191 std::vector< std::vector<Scalar> > _data; // Main data structure to store all points 00192 00193 int _resolution; // Number of points on the curve 00194 00195 int _approach; // Approach to be used: 0=undefined, 1=traditional (presence x absence), 2=proportional area 00196 00197 int _num_background_points; // Number of background points to be generated when there are no absences. Only for proportional area approach. 00198 bool _use_absences_as_background; // Indicates if absence points should be used as background points. Only for proportional area approach. 00199 00200 int _true_negatives; // Number of true negatives (binarized) 00201 int _true_positives; // Number of true positives (binarized) 00202 00203 double _auc; // Area under the curve. Need to store this to avoid recalculating in serialization. 00204 00205 std::map<double, double> _ratios; // Ratios calculated via getPartialAreaRatio (max omission <=> ratio). Ratios are stored here to be used during serialization. 00206 00207 std::vector<Scalar> _thresholds; // Thresholds in ascending order 00208 00209 std::vector<Scalar> _proportions; // Proportional area for each point 00210 00211 bool _ready; 00212 }; 00213 00214 #endif