openModeller  Version 1.4.0
RocCurve.hh
Go to the documentation of this file.
00001 
00026 #ifndef _ROCCURVEHH_
00027 #define _ROCCURVEHH_
00028 
00029 #include <openmodeller/om_defs.hh>
00030 #include <openmodeller/Configuration.hh>
00031 
00032 #include <openmodeller/Model.hh>
00033 
00034 #include <openmodeller/Occurrences.hh>
00035 #include <openmodeller/Environment.hh>
00036 #include <openmodeller/Sampler.hh>
00037 
00038 #include <map>
00039 
00040 #define ROC_DEFAULT_RESOLUTION 15
00041 #define ROC_DEFAULT_BACKGROUND_POINTS 10000
00042 
00046 class dllexp RocCurve
00047 {
00048 public:
00052     RocCurve();
00053 
00057   ~RocCurve();
00058 
00066   void initialize( int resolution=ROC_DEFAULT_RESOLUTION );
00067 
00075   void initialize( int resolution, int num_background_points );
00076 
00086   void initialize( int resolution, bool use_absences_as_background );
00087 
00091   void reset();
00092 
00100   void calculate( const Model& model, const SamplerPtr& sampler );
00101   
00106   int numPoints() const { return _data.size(); }
00107 
00115   double getX( int point_index ) const { return _data[point_index][0]; }
00116 
00123   double getY( int point_index ) const { return _data[point_index][1]; }
00124 
00129   double getTotalArea();
00130 
00139   double getPartialAreaRatio( double e=1.0 );
00140 
00144   bool ready() const { return _ready; }
00145 
00149   ConfigurationPtr getConfiguration() const;
00150 
00151 private:
00152 
00156   struct VectorCompare {
00157 
00158     bool operator () ( const std::vector<Scalar> &a, const std::vector<Scalar> &b ) const {
00159 
00160       if ( a[0] != b[0] ) {
00161 
00162         return a[0] < b[0]; // Compare 1 - specificity.
00163       }
00164       else {
00165 
00166         return a[1] < b[1]; // Compare sensitivity (1 - specificity values are equal).
00167       }
00168     }
00169   };
00170 
00176   void _loadPredictions( const Model& model, const SamplerPtr& sampler );
00177  
00181   void _calculateGraphPoints(); 
00182 
00186   bool _calculateTotalArea(); 
00187   
00188   std::vector<int> _category;      // 0=absence, 1=presence
00189   std::vector<Scalar> _prediction; // associated probabilities
00190 
00191   std::vector< std::vector<Scalar> > _data; // Main data structure to store all points
00192   
00193   int _resolution; // Number of points on the curve
00194 
00195   int _approach; // Approach to be used: 0=undefined, 1=traditional (presence x absence), 2=proportional area
00196 
00197   int _num_background_points; // Number of background points to be generated when there are no absences. Only for proportional area approach.
00198   bool _use_absences_as_background; // Indicates if absence points should be used as background points. Only for proportional area approach.
00199 
00200   int _true_negatives; // Number of true negatives (binarized)
00201   int _true_positives; // Number of true positives (binarized)
00202 
00203   double _auc; // Area under the curve. Need to store this to avoid recalculating in serialization.
00204 
00205   std::map<double, double> _ratios; // Ratios calculated via getPartialAreaRatio (max omission <=> ratio). Ratios are stored here to be used during serialization.
00206 
00207   std::vector<Scalar> _thresholds; // Thresholds in ascending order
00208 
00209   std::vector<Scalar> _proportions; // Proportional area for each point
00210 
00211   bool _ready;
00212 };
00213 
00214 #endif