openModeller  Version 1.4.0
svm_alg.cpp
Go to the documentation of this file.
00001 
00027 #include "svm_alg.hh"
00028 #include "svm.h"
00029 #include <openmodeller/MeanVarianceNormalizer.hh>
00030 #include <openmodeller/Sampler.hh>
00031 #include <openmodeller/Exceptions.hh>
00032 
00033 #include <string.h>
00034 #include <stdio.h>
00035 #include <stdlib.h>
00036 
00037 //debug
00038 #include <iostream>
00039 
00040 using namespace std;
00041 
00042 /****************************************************************/
00043 /********************** Algorithm's Metadata ********************/
00044 
00045 #define NUM_PARAM 9
00046 
00047 #define SVMTYPE_ID    "SvmType"
00048 #define KERNELTYPE_ID "KernelType"
00049 #define DEGREE_ID     "Degree"
00050 #define GAMMA_ID      "Gamma"
00051 #define COEF0_ID      "Coef0"
00052 #define C_ID          "C"
00053 #define NU_ID         "Nu"
00054 #define PROB_ID       "ProbabilisticOutput"
00055 #define PSEUDO_ID     "NumberOfPseudoAbsences"
00056 
00057 #define SVM_LOG_PREFIX "SvmAlgorithm: "
00058 
00059 /******************************/
00060 /*** Algorithm's parameters ***/
00061 
00062 static AlgParamMetadata parameters[NUM_PARAM] = {
00063 
00064   // SVM type
00065   {
00066     SVMTYPE_ID,      // Id.
00067     "SVM type",      // Name.
00068     Integer,         // Type.
00069     "Type of SVM: 0 = C-SVC, 1 = Nu-SVC, 2 = one-class SVM",   // Overview
00070     "Type of SVM: 0 = C-SVC, 1 = Nu-SVC, 2 = one-class SVM", // Description.
00071     1,         // Not zero if the parameter has lower limit.
00072     0,         // Parameter's lower limit.
00073     1,         // Not zero if the parameter has upper limit.
00074     2,         // Parameter's upper limit.
00075     "0"        // Parameter's typical (default) value.
00076   },
00077   // Kernel type
00078   {
00079     KERNELTYPE_ID,     // Id.
00080     "Kernel type",     // Name.
00081     Integer,           // Type.
00082     "Type of kernel function: 0 = linear: u'*v , 1 = polynomial: (gamma*u'*v + coef0)^degree , 2 = radial basis function: exp(-gamma*|u-v|^2)", // Overview
00083     "Type of kernel function: 0 = linear: u'*v , 1 = polynomial: (gamma*u'*v + coef0)^degree , 2 = radial basis function: exp(-gamma*|u-v|^2)", // Description.
00084     1,         // Not zero if the parameter has lower limit.
00085     0,         // Parameter's lower limit.
00086     1,         // Not zero if the parameter has upper limit.
00087     4,         // Parameter's upper limit.
00088     "2"        // Parameter's typical (default) value.
00089   },
00090   // Degree
00091   {
00092     DEGREE_ID,         // Id.
00093     "Degree",          // Name.
00094     Integer,           // Type.
00095     "Degree in kernel function (only for polynomial kernels).", // Overview
00096     "Degree in kernel function (only for polynomial kernels).", // Description.
00097     1,         // Not zero if the parameter has lower limit.
00098     0,         // Parameter's lower limit.
00099     0,         // Not zero if the parameter has upper limit.
00100     0,         // Parameter's upper limit.
00101     "3"        // Parameter's typical (default) value.
00102   },
00103   // Gamma
00104   {
00105     GAMMA_ID,         // Id.
00106     "Gamma",          // Name.
00107     Real,             // Type.
00108     "Gamma in kernel function (only for polynomial and radial basis kernels). When set to zero, the default value will actually be 1/k, where k is the number of layers.", // Overview
00109     "Gamma in kernel function (only for polynomial and radial basis kernels). When set to zero, the default value will actually be 1/k, where k is the number of layers.", // Description.
00110     0,         // Not zero if the parameter has lower limit.
00111     0,         // Parameter's lower limit.
00112     0,         // Not zero if the parameter has upper limit.
00113     0,         // Parameter's upper limit.
00114     "0"        // Parameter's typical (default) value.
00115   },
00116   // Coef0
00117   {
00118     COEF0_ID,         // Id.
00119     "Coef0",          // Name.
00120     Real,             // Type.
00121     "Coef0 in kernel function (only for polynomial kernels).", // Overview
00122     "Coef0 in kernel function (only for polynomial kernels).", // Description.
00123     0,         // Not zero if the parameter has lower limit.
00124     0,         // Parameter's lower limit.
00125     0,         // Not zero if the parameter has upper limit.
00126     0,         // Parameter's upper limit.
00127     "0"        // Parameter's typical (default) value.
00128   },
00129   // C
00130   {
00131     C_ID,         // Id.
00132     "Cost",       // Name.
00133     Real,         // Type.
00134     "Cost (only for C-SVC types).", // Overview
00135     "Cost (only for C-SVC types).", // Description.
00136     1,         // Not zero if the parameter has lower limit.
00137     0.001,     // Parameter's lower limit.
00138     0,         // Not zero if the parameter has upper limit.
00139     0,         // Parameter's upper limit.
00140     "1"        // Parameter's typical (default) value.
00141   },
00142   // Nu
00143   {
00144     NU_ID,         // Id.
00145     "Nu",          // Name.
00146     Real,          // Type.
00147     "Nu (only for Nu-SVC and one-class SVM).", // Overview
00148     "Nu (only for Nu-SVC and one-class SVM).", // Description.
00149     1,         // Not zero if the parameter has lower limit.
00150     0.001,     // Parameter's lower limit.
00151     1,         // Not zero if the parameter has upper limit.
00152     1,         // Parameter's upper limit.
00153     "0.5"      // Parameter's typical (default) value.
00154   },
00155   // Probabilistic output
00156   {
00157     PROB_ID,                 // Id.
00158     "Probabilistic output",  // Name.
00159     Integer,                 // Type.
00160     "Indicates if the output should be a probability instead of a binary response (only available for C-SVC and Nu-SVC).", // Overview
00161     "Indicates if the output should be a probability instead of a binary response (only available for C-SVC and Nu-SVC).", // Description.
00162     1,         // Not zero if the parameter has lower limit.
00163     0,         // Parameter's lower limit.
00164     1,         // Not zero if the parameter has upper limit.
00165     1,         // Parameter's upper limit.
00166     "1"        // Parameter's typical (default) value.
00167   },
00168   // Number of pseudo absences to be generated
00169   {
00170     PSEUDO_ID,                   // Id.
00171     "Number of pseudo-absences", // Name.
00172     Integer,                     // Type.
00173     "Number of pseudo-absences to be generated (only for C-SVC and Nu-SVC when no absences have been provided). When absences are needed, a zero parameter will default to the same number of presences.", // Overview
00174     "Number of pseudo-absences to be generated (only for C-SVC and Nu-SVC when no absences have been provided). When absences are needed, a zero parameter will default to the same number of presences.", // Description.
00175     1,         // Not zero if the parameter has lower limit.
00176     0,         // Parameter's lower limit.
00177     0,         // Not zero if the parameter has upper limit.
00178     0,         // Parameter's upper limit.
00179     "0"        // Parameter's typical (default) value.
00180   },
00181 };
00182 
00183 /************************************/
00184 /*** Algorithm's general metadata ***/
00185 
00186 static AlgMetadata metadata = {
00187 
00188   "SVM",                     // Id.
00189   "SVM (Support Vector Machines)", // Name.
00190   "0.5",                           // Version.
00191 
00192   // Overview
00193   "Support vector machines (SVMs) are a set of related supervised learning methods that belong to a family of generalized linear classifiers. They can also be considered a special case of Tikhonov regularization. A special property of SVMs is that they simultaneously minimize the empirical classification error and maximize the geometric margin; hence they are also known as maximum margin classifiers. Content retrieved from Wikipedia on the 13th of June, 2007: http://en.wikipedia.org/w/index.php?title=Support_vector_machine&oldid=136646498.",
00194 
00195   // Description.
00196   "Support vector machines map input vectors to a higher dimensional space where a maximal separating hyperplane is constructed. Two parallel hyperplanes are constructed on each side of the hyperplane that separates the data. The separating hyperplane is the hyperplane that maximises the distance between the two parallel hyperplanes. An assumption is made that the larger the margin or distance between these parallel hyperplanes the better the generalisation error of the classifier will be. The model produced by support vector classification only depends on a subset of the training data, because the cost function for building the model does not care about training points that lie beyond the margin. Content retrieved from Wikipedia on the 13th of June, 2007: http://en.wikipedia.org/w/index.php?title=Support_vector_machine&oldid=136646498. The openModeller implementation of SVMs makes use of the libsvm library version 2.85: Chih-Chung Chang and Chih-Jen Lin, LIBSVM: a library for support vector machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm.\n\nRelease history:\n version 0.1: initial release\n version 0.2: New parameter to specify the number of pseudo-absences to be generated; upgraded to libsvm 2.85; fixed memory leaks\n version 0.3: when absences are needed and the number of pseudo absences to be generated is zero, it will default to the same number of presences\n version 0.4: included missing serialization of C\n version 0.5: the indication if the algorithm needed normalized environmental data was not working when the algorithm was loaded from an existing model.",
00197 
00198   "Vladimir N. Vapnik", // Algorithm author.
00199   "1) Vapnik, V. (1995) The Nature of Statistical Learning Theory. SpringerVerlag. 2) Schölkopf, B., Smola, A., Williamson, R. and Bartlett, P.L.(2000). New support vector algorithms. Neural Computation, 12, 1207-1245. 3) Schölkopf, B., Platt, J.C., Shawe-Taylor, J., Smola A.J. and Williamson, R.C. (2001). Estimating the support of a high-dimensional distribution. Neural Computation, 13, 1443-1471. 4) Cristianini, N. & Shawe-Taylor, J. (2000). An Introduction to Support Vector Machines and other kernel-based learning methods. Cambridge University Press.", // Bibliography.
00200 
00201   "Renato De Giovanni in collaboration with Ana Carolina Lorena", // Code author.
00202   "renato [at] cria . org . br", // Code author's contact.
00203 
00204   0, // Does not accept categorical data.
00205   0, // Does not need (pseudo)absence points.
00206 
00207   NUM_PARAM, // Algorithm's parameters.
00208   parameters
00209 };
00210 
00211 // Note: I needed to copy this structure definition from svm.cpp, otherwise
00212 // our custom serialization would not compile. If there's any problem with this 
00213 // approach, then I suggest removing this definition and then moving the svm_model
00214 // definition from svm.cpp to svm.h
00215 struct svm_model
00216 {
00217   svm_parameter param;// parameter
00218   int nr_class;       // number of classes, = 2 in regression/one class svm
00219   int l;              // total #SV
00220   svm_node **SV;      // SVs (SV[l])
00221   double **sv_coef;   // coefficients for SVs in decision functions (sv_coef[k-1][l])
00222   double *rho;        // constants in decision functions (rho[k*(k-1)/2])
00223   double *probA;      // parwise probability information
00224   double *probB;
00225 
00226   // for classification only
00227 
00228   int *label; // label of each class (label[k])
00229   int *nSV;   // number of SVs for each class (nSV[k])
00230               // nSV[0] + nSV[1] + ... + nSV[k-1] = l
00231   // XXX
00232   int free_sv; // 1 if svm_model is created by svm_load_model
00233                // 0 if svm_model is created by svm_train
00234 };
00235 
00236 /****************************************************************/
00237 /****************** Algorithm's factory function ****************/
00238 
00239 OM_ALG_DLL_EXPORT
00240 AlgorithmImpl *
00241 algorithmFactory()
00242 {
00243   return new SvmAlgorithm();
00244 }
00245 
00246 OM_ALG_DLL_EXPORT
00247 AlgMetadata const *
00248 algorithmMetadata()
00249 {
00250   return &metadata;
00251 }
00252 
00253 
00254 /*********************************************/
00255 /************** SVM algorithm ****************/
00256 
00257 /*******************/
00258 /*** constructor ***/
00259 
00260 SvmAlgorithm::SvmAlgorithm() :
00261   AlgorithmImpl( &metadata ),
00262   _done( false ),
00263   _num_layers( 0 ),
00264   _svm_model( 0 ),
00265   _presence_index( -1 )
00266 {
00267   _normalizerPtr = new MeanVarianceNormalizer();
00268 
00269   // Needs to be initialized (see destructor)
00270   _svm_model = 0;
00271 
00272   _svm_problem.l = 0;
00273 }
00274 
00275 
00276 /******************/
00277 /*** destructor ***/
00278 
00279 SvmAlgorithm::~SvmAlgorithm()
00280 {
00281   if ( _svm_model ) {
00282 
00283     svm_destroy_model( _svm_model );
00284   }
00285 
00286   if ( _svm_problem.l > 0 ) {
00287 
00288     delete[] _svm_problem.y;
00289 
00290     for ( int i = 0; i < _svm_problem.l; ++i ) {
00291 
00292       delete _svm_problem.x[i];
00293     }
00294 
00295     delete[] _svm_problem.x;
00296   }
00297 }
00298 
00299 /**************************/
00300 /*** need Normalization ***/
00301 int SvmAlgorithm::needNormalization()
00302 {
00303   int svm_type;
00304 
00305   if ( done() ) {
00306 
00307     if ( ! _normalizerPtr ) {
00308       
00309       return 0;
00310     }
00311   }
00312   else {
00313     
00314     if ( getParameter( SVMTYPE_ID, &svm_type ) && svm_type != 2 && _samp->numAbsence() == 0 ) {
00315 
00316       // It will be necessary to generate pseudo absences, so do not waste
00317       // time normalizing things because normalization should ideally consider
00318       // all trainning points (including pseudo-absences). In this specific case, 
00319       // normalization will take place in initialize().
00320       return 0;
00321     }
00322   }
00323 
00324   return 1;
00325 }
00326 
00327 /******************/
00328 /*** initialize ***/
00329 int
00330 SvmAlgorithm::initialize()
00331 {
00332   // SVM type
00333   if ( ! getParameter( SVMTYPE_ID, &_svm_parameter.svm_type ) ) {
00334 
00335     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" SVMTYPE_ID "' not passed.\n" );
00336     return 0;
00337   }
00338 
00339   // Need to check SVM type because some types from the svm library will not be supported
00340   if ( _svm_parameter.svm_type != 0 && 
00341        _svm_parameter.svm_type != 1 && 
00342        _svm_parameter.svm_type != 2  ) {
00343 
00344     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" SVMTYPE_ID "' not set properly. It must be 0, 1 or 2.\n" );
00345     return 0;
00346   }
00347 
00348   // Kernel type
00349   if ( ! getParameter( KERNELTYPE_ID, &_svm_parameter.kernel_type ) ) {
00350 
00351     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" KERNELTYPE_ID "' not passed.\n" );
00352     return 0;
00353   }
00354 
00355   // Need to check Kernel type because some kernels from the svm library will not be supported
00356   if ( _svm_parameter.kernel_type != 0 && 
00357        _svm_parameter.kernel_type != 1 && 
00358        _svm_parameter.kernel_type != 2  ) {
00359 
00360     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" KERNELTYPE_ID "' not set properly. It must be 0, 1 or 2.\n" );
00361     return 0;
00362   }
00363 
00364   // Degree
00365   if ( ! getParameter( DEGREE_ID, &_svm_parameter.degree ) ) {
00366 
00367     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" DEGREE_ID "' not passed.\n" );
00368     return 0;
00369   }
00370 
00371   // Gamma
00372   if ( ! getParameter( GAMMA_ID, &_svm_parameter.gamma ) ) {
00373 
00374     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" GAMMA_ID "' not passed.\n" );
00375     return 0;
00376   }
00377 
00378   _num_layers = _samp->numIndependent();
00379 
00380   if ( _svm_parameter.gamma == 0 ) {
00381 
00382     _svm_parameter.gamma = 1.0/_num_layers;
00383   }
00384 
00385   // Coef0
00386   if ( ! getParameter( COEF0_ID, &_svm_parameter.coef0 ) ) {
00387 
00388     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" COEF0_ID "' not passed.\n" );
00389     return 0;
00390   }
00391 
00392   // C
00393   if ( ! getParameter( C_ID, &_svm_parameter.C ) ) {
00394 
00395     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" C_ID "' not passed.\n" );
00396     return 0;
00397   }
00398 
00399   // Nu
00400   if ( ! getParameter( NU_ID, &_svm_parameter.nu ) ) {
00401 
00402     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" NU_ID "' not passed.\n" );
00403     return 0;
00404   }
00405 
00406   // Probabilistic output
00407   if ( ! getParameter( PROB_ID, &_svm_parameter.probability ) ) {
00408 
00409     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" PROB_ID "' not passed.\n" );
00410     return 0;
00411   }
00412 
00413   // Check if probabilistic output is 0 or 1
00414   if ( _svm_parameter.probability != 0 && 
00415        _svm_parameter.probability != 1 ) {
00416 
00417     Log::instance()->error( SVM_LOG_PREFIX "Parameter '" PROB_ID "' not set properly. It must be 0 or 1.\n" );
00418     return 0;
00419   }
00420 
00421   // Probability estimates are not available for one-class SVM
00422   if ( _svm_parameter.svm_type == 2  ) {
00423 
00424     Log::instance()->warn( SVM_LOG_PREFIX "Probability estimates are not available for one-class SVM. Ignoring parameter.\n" );
00425     _svm_parameter.probability = 0;
00426   }
00427 
00428   // Is this necessary?
00429   _svm_parameter.cache_size = 100;
00430   _svm_parameter.eps = 1e-3;
00431   _svm_parameter.p = 0.1;
00432   _svm_parameter.shrinking = 1;
00433   _svm_parameter.nr_weight = 0;
00434   _svm_parameter.weight_label = NULL;
00435   _svm_parameter.weight = NULL;
00436 
00437   // Remove redundant entries
00438   //_samp->environmentallyUnique();
00439 
00440   // Check the number of presences
00441   int num_presences = _samp->numPresence();
00442 
00443   if ( num_presences == 0 ) {
00444 
00445     Log::instance()->warn( SVM_LOG_PREFIX "No presence points inside the mask!\n" );
00446     return 0;
00447   }
00448 
00449   int num_absences = _samp->numAbsence();
00450 
00451   bool generate_pseudo_absences = false;
00452 
00453   // All types of SVM will need absences, except one-class SVM
00454   if ( num_absences <= 0 && _svm_parameter.svm_type != 2 ) {
00455 
00456     Log::instance()->warn( SVM_LOG_PREFIX "No absence points available.\n" );
00457 
00458     // Pseudo-absences will be generated later
00459     if ( ! getParameter( PSEUDO_ID, &num_absences ) ) {
00460 
00461       Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences unspecified. Default will be %d (same number of presences).\n", num_presences );
00462 
00463       num_absences = num_presences;
00464     }
00465     else if ( num_absences == 0 ) {
00466 
00467       Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences will be %d (same number of presences).\n", num_presences );
00468 
00469       num_absences = num_presences;
00470     }
00471     else if ( num_absences < 0 ) {
00472 
00473       Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences must be a positive number.\n" );
00474       return 0;
00475     }
00476 
00477     generate_pseudo_absences = true;
00478   }
00479 
00480   int num_points = num_presences;
00481 
00482   // Sum absence points if not dealing with one-class SVM
00483   if ( _svm_parameter.svm_type != 2 ) {
00484 
00485     num_points += num_absences;
00486   }
00487 
00488   _svm_problem.l = num_points;
00489 
00490   _svm_problem.y = new double[num_points];
00491   _svm_problem.x = new svm_node*[num_points];
00492 
00493   // Load SVM problem with samples
00494 
00495   OccurrencesPtr presences = _samp->getPresences();
00496 
00497   OccurrencesImpl::const_iterator p_iterator;
00498   OccurrencesImpl::const_iterator p_end;
00499 
00500   int i = 0; // shared counter
00501 
00502   // Absences
00503 
00504   if ( _svm_parameter.svm_type != 2 ) {
00505 
00506     OccurrencesPtr absences;
00507 
00508     if ( generate_pseudo_absences ) {
00509 
00510       Log::instance()->info( SVM_LOG_PREFIX "Generating pseudo-absences.\n" );
00511 
00512       absences = new OccurrencesImpl( presences->label(), presences->coordSystem() );
00513 
00514       for ( int i = 0; i < num_absences; ++i ) {
00515 
00516         OccurrencePtr oc = _samp->getPseudoAbsence();
00517         absences->insert( oc ); 
00518       }
00519 
00520       // Compute normalization with all points
00521       SamplerPtr mySamplerPtr = createSampler( _samp->getEnvironment(), presences, absences );
00522 
00523       _normalizerPtr->computeNormalization( mySamplerPtr );
00524 
00525       setNormalization( _samp );
00526 
00527       absences->normalize( _normalizerPtr );
00528     }
00529     else {
00530 
00531       // should be normalized already
00532       absences = _samp->getAbsences();
00533     }
00534 
00535     p_iterator = absences->begin();
00536     p_end = absences->end();
00537 
00538     while ( p_iterator != p_end ) {
00539 
00540       Sample point = (*p_iterator)->environment();
00541 
00542       _svm_problem.y[i] = -1; // absence
00543 
00544       _svm_problem.x[i] = new svm_node[_num_layers+1];
00545 
00546       _getNode( _svm_problem.x[i], point );
00547     
00548       ++p_iterator;
00549       ++i;
00550     }
00551   }
00552 
00553   // Presences (should be normalized already, in one way or another)
00554 
00555   p_iterator = presences->begin();
00556   p_end = presences->end();
00557 
00558   while ( p_iterator != p_end ) {
00559 
00560     Sample point = (*p_iterator)->environment();
00561 
00562     _svm_problem.y[i] = +1; // presence
00563 
00564     _svm_problem.x[i] = new svm_node[_num_layers+1];
00565 
00566     _getNode( _svm_problem.x[i], point );
00567     
00568     ++p_iterator;
00569     ++i;
00570   }
00571 
00572   // Check parameters using svm library logic
00573   const char *error_msg;
00574   error_msg = svm_check_parameter( &_svm_problem, &_svm_parameter );
00575 
00576   if ( error_msg ) {
00577 
00578     Log::instance()->error( error_msg );
00579     return 0;
00580   }
00581 
00582   return 1;
00583 }
00584 
00585 
00586 /***************/
00587 /*** iterate ***/
00588 int
00589 SvmAlgorithm::iterate()
00590 {
00591   _svm_model = svm_train( &_svm_problem, &_svm_parameter );
00592 
00593   if ( _svm_parameter.probability == 1 && svm_check_probability_model( _svm_model ) == 0 ){
00594 
00595     Log::instance()->error( SVM_LOG_PREFIX "Generated model cannot return probability estimates.\n" );
00596     return 0;
00597   }
00598 
00599   int * labels = new int[2];
00600 
00601   svm_get_labels( _svm_model, labels );
00602 
00603   _presence_index = ( labels[0] == +1 ) ? 0 : 1;
00604 
00605   delete labels;
00606 
00607   _done = true;
00608 
00609   // debug
00610   //svm_save_model( "model.svm", _svm_model );
00611 
00612   return 1;
00613 }
00614 
00615 
00616 /************/
00617 /*** done ***/
00618 int
00619 SvmAlgorithm::done() const
00620 {
00621   return _done;
00622 }
00623 
00624 /*****************/
00625 /*** get Value ***/
00626 Scalar
00627 SvmAlgorithm::getValue( const Sample& x ) const
00628 {
00629   svm_node * node = new svm_node[_num_layers+1];
00630 
00631   _getNode( node, x );
00632 
00633   double prob;
00634 
00635   if ( _svm_parameter.probability == 1 ) {
00636 
00637     // Probability output
00638 
00639     double * estimates = new double[2];
00640 
00641     svm_predict_probability( _svm_model, node, estimates );
00642 
00643     prob = estimates[_presence_index];
00644 
00645     delete[] estimates;
00646   }
00647   else {
00648 
00649     // Binary output
00650 
00651     double class_predicted = svm_predict( _svm_model, node );
00652 
00653     prob = ( class_predicted < 0.0 ) ? 0 : 1;
00654   }
00655 
00656   delete[] node;
00657 
00658   return prob;
00659 }
00660 
00661 /***********************/
00662 /*** get Convergence ***/
00663 int
00664 SvmAlgorithm::getConvergence( Scalar * const val ) const
00665 {
00666   *val = 1.0;
00667   return 1;
00668 }
00669 
00670 /****************/
00671 /*** get Node ***/
00672 void
00673 SvmAlgorithm::_getNode( svm_node * node, const Sample& sample ) const
00674 {
00675   for ( int j = 0; j < _num_layers; ++j ) {
00676 
00677     node[j].index = j+1;  // attr index (must start with 1!)
00678     node[j].value = sample[j]; // attr value
00679   }
00680 
00681   node[_num_layers].index = -1; // end of array
00682   node[_num_layers].value = 0;  // end of array
00683 }
00684 
00685 /****************************************************************/
00686 /****************** configuration *******************************/
00687 void
00688 SvmAlgorithm::_getConfiguration( ConfigurationPtr& config ) const
00689 {
00690   if ( ! _done )
00691     return;
00692 
00693   ConfigurationPtr model_config( new ConfigurationImpl("Svm") );
00694   config->addSubsection( model_config );
00695 
00696   model_config->addNameValue( "NumLayers", _num_layers );
00697   model_config->addNameValue( "Type", _svm_parameter.svm_type );
00698   model_config->addNameValue( "KernelType", _svm_parameter.kernel_type );
00699   model_config->addNameValue( "Probabilistic", _svm_parameter.probability );
00700   model_config->addNameValue( "Degree", _svm_parameter.degree );
00701   model_config->addNameValue( "Gamma", _svm_parameter.gamma );
00702   model_config->addNameValue( "Coef0", _svm_parameter.coef0 );
00703   model_config->addNameValue( "C", _svm_parameter.C );
00704   model_config->addNameValue( "Rho", _svm_model->rho[0] ); // assuming always one
00705 
00706   if ( _svm_parameter.probability == 1 ) {
00707 
00708       model_config->addNameValue( "ProbA", _svm_model->probA[0] );
00709       model_config->addNameValue( "ProbB", _svm_model->probB[0] );
00710   }
00711 
00712   if ( _svm_parameter.svm_type != 2 ) {
00713 
00714       model_config->addNameValue( "NrSv", _svm_model->nSV, 2 );
00715   }
00716 
00717   ConfigurationPtr vectors_config( new ConfigurationImpl("Vectors") );
00718   model_config->addSubsection( vectors_config );
00719 
00720   vectors_config->addNameValue( "Total", _svm_model->l );
00721 
00722   // Labels
00723   int * labels = new int[2];
00724 
00725   svm_get_labels( _svm_model, labels );
00726 
00727   model_config->addNameValue( "Labels", labels, 2 );
00728 
00729   delete labels;
00730 
00731   // Vectors
00732   const double * const *sv_coef = _svm_model->sv_coef;
00733   const svm_node * const *SV = _svm_model->SV;
00734 
00735   for ( int i = 0; i < _svm_model->l; i++ ) {
00736 
00737     ConfigurationPtr vector_config( new ConfigurationImpl("Vector") );
00738     vectors_config->addSubsection( vector_config );
00739 
00740     vector_config->addNameValue( "Coef", sv_coef[0][i] );
00741 
00742     const svm_node *p = SV[i];
00743 
00744     while ( p->index != -1 ) {
00745 
00746       ConfigurationPtr node_config( new ConfigurationImpl("Node") );
00747       vector_config->addSubsection( node_config );
00748 
00749       node_config->addNameValue( "Index", p->index );
00750       node_config->addNameValue( "Value", p->value );
00751 
00752       p++;
00753     }
00754   }
00755 }
00756 
00757 void
00758 SvmAlgorithm::_setConfiguration( const ConstConfigurationPtr& config )
00759 {
00760   ConstConfigurationPtr model_config = config->getSubsection( "Svm", false );
00761 
00762   if ( ! model_config )
00763     return;
00764 
00765   _svm_model = (svm_model *)malloc( (1)*sizeof( svm_model ) );
00766 
00767   _svm_model->probA = NULL;
00768   _svm_model->probB = NULL;
00769   _svm_model->label = NULL;
00770   _svm_model->nSV   = NULL;
00771 
00772   _num_layers = model_config->getAttributeAsInt( "NumLayers", 0 );
00773   _svm_parameter.svm_type = model_config->getAttributeAsInt( "Type", 0 );
00774   _svm_parameter.kernel_type = model_config->getAttributeAsInt( "KernelType", 2 );
00775   _svm_parameter.probability = model_config->getAttributeAsInt( "Probabilistic", 1 );
00776   _svm_parameter.degree = model_config->getAttributeAsInt( "Degree", 3 );
00777   _svm_parameter.gamma = model_config->getAttributeAsDouble( "Gamma", 0 );
00778   _svm_parameter.coef0 = model_config->getAttributeAsDouble( "Coef0", 0 );
00779 
00780   try {
00781 
00782     // Serialization of "C" was added in version 0.4 of this algorithm
00783     _svm_parameter.C = model_config->getAttributeAsDouble( "C", 1 );
00784   }
00785   catch ( AttributeNotFound& e ) {
00786 
00787     UNUSED( e );
00788   }
00789 
00790   _svm_parameter.cache_size = 100;
00791   _svm_parameter.eps = 1e-3;
00792   _svm_parameter.p = 0.1;
00793   _svm_parameter.shrinking = 1;
00794   _svm_parameter.nr_weight = 0;
00795   _svm_parameter.weight_label = NULL;
00796   _svm_parameter.weight = NULL;
00797 
00798   _svm_model->nr_class = 2; // presence or absence
00799 
00800   _svm_model->rho = new double[1];
00801 
00802   _svm_model->rho[0] = model_config->getAttributeAsDouble( "Rho", 0.0 ); // assuming always one
00803 
00804   if ( _svm_parameter.probability == 1 ) {
00805 
00806       _svm_model->probA = new double[1];
00807       _svm_model->probB = new double[1];
00808 
00809       _svm_model->probA[0] = model_config->getAttributeAsDouble( "ProbA", 0.0 );
00810       _svm_model->probB[0] = model_config->getAttributeAsDouble( "ProbB", 0.0 );
00811   }
00812 
00813   if ( _svm_parameter.svm_type != 2 ) {
00814 
00815       int size;
00816       model_config->getAttributeAsIntArray( "NrSv", &_svm_model->nSV, &size );
00817   }
00818 
00819   ConstConfigurationPtr vectors_config = model_config->getSubsection( "Vectors", false );
00820 
00821   _svm_model->l = vectors_config->getAttributeAsInt( "Total", 0 );
00822 
00823   Configuration::subsection_list vectors = vectors_config->getAllSubsections();
00824 
00825   Configuration::subsection_list::iterator vec = vectors.begin();
00826   Configuration::subsection_list::iterator last_vec = vectors.end();
00827 
00828   _svm_model->sv_coef = new double*[_svm_model->nr_class - 1];
00829   _svm_model->sv_coef[0] = new double[_svm_model->l];
00830 
00831   _svm_model->SV = new svm_node*[_svm_model->l];
00832 
00833   int i = 0;
00834 
00835   for ( ; vec != last_vec; ++vec ) {
00836 
00837     if ( (*vec)->getName() != "Vector" ) {
00838 
00839       continue;
00840     }
00841 
00842     _svm_model->sv_coef[0][i] = (*vec)->getAttributeAsDouble( "Coef", 0.0 );
00843 
00844     _svm_model->SV[i] = new svm_node[_num_layers + 1];
00845 
00846     Configuration::subsection_list nodes = (*vec)->getAllSubsections();
00847 
00848     Configuration::subsection_list::iterator node = nodes.begin();
00849     Configuration::subsection_list::iterator last_node = nodes.end();
00850 
00851     int j = 0;
00852 
00853     for ( ; node != last_node; ++node ) {
00854 
00855       if ( (*node)->getName() != "Node" ) {
00856 
00857         continue;
00858       }
00859 
00860       _svm_model->SV[i][j].index = (*node)->getAttributeAsInt( "Index", 0 );
00861       _svm_model->SV[i][j].value = (*node)->getAttributeAsDouble( "Value", 0.0 );
00862 
00863       ++j;
00864     }
00865 
00866     _svm_model->SV[i][j].index = -1;
00867     _svm_model->SV[i][j].value = 0.0;
00868 
00869     ++i;
00870   }
00871 
00872   // Labels
00873   int size;
00874   model_config->getAttributeAsIntArray( "Labels", &_svm_model->label, &size );
00875 
00876   _presence_index = ( _svm_model->label[0] == +1 ) ? 0 : 1;
00877 
00878   _svm_model->param = _svm_parameter;
00879 
00880   _svm_model->free_sv = 1;
00881 
00882   _done = true;
00883 }