openModeller
Version 1.4.0
|
00001 00027 #include "svm_alg.hh" 00028 #include "svm.h" 00029 #include <openmodeller/MeanVarianceNormalizer.hh> 00030 #include <openmodeller/Sampler.hh> 00031 #include <openmodeller/Exceptions.hh> 00032 00033 #include <string.h> 00034 #include <stdio.h> 00035 #include <stdlib.h> 00036 00037 //debug 00038 #include <iostream> 00039 00040 using namespace std; 00041 00042 /****************************************************************/ 00043 /********************** Algorithm's Metadata ********************/ 00044 00045 #define NUM_PARAM 9 00046 00047 #define SVMTYPE_ID "SvmType" 00048 #define KERNELTYPE_ID "KernelType" 00049 #define DEGREE_ID "Degree" 00050 #define GAMMA_ID "Gamma" 00051 #define COEF0_ID "Coef0" 00052 #define C_ID "C" 00053 #define NU_ID "Nu" 00054 #define PROB_ID "ProbabilisticOutput" 00055 #define PSEUDO_ID "NumberOfPseudoAbsences" 00056 00057 #define SVM_LOG_PREFIX "SvmAlgorithm: " 00058 00059 /******************************/ 00060 /*** Algorithm's parameters ***/ 00061 00062 static AlgParamMetadata parameters[NUM_PARAM] = { 00063 00064 // SVM type 00065 { 00066 SVMTYPE_ID, // Id. 00067 "SVM type", // Name. 00068 Integer, // Type. 00069 "Type of SVM: 0 = C-SVC, 1 = Nu-SVC, 2 = one-class SVM", // Overview 00070 "Type of SVM: 0 = C-SVC, 1 = Nu-SVC, 2 = one-class SVM", // Description. 00071 1, // Not zero if the parameter has lower limit. 00072 0, // Parameter's lower limit. 00073 1, // Not zero if the parameter has upper limit. 00074 2, // Parameter's upper limit. 00075 "0" // Parameter's typical (default) value. 00076 }, 00077 // Kernel type 00078 { 00079 KERNELTYPE_ID, // Id. 00080 "Kernel type", // Name. 00081 Integer, // Type. 00082 "Type of kernel function: 0 = linear: u'*v , 1 = polynomial: (gamma*u'*v + coef0)^degree , 2 = radial basis function: exp(-gamma*|u-v|^2)", // Overview 00083 "Type of kernel function: 0 = linear: u'*v , 1 = polynomial: (gamma*u'*v + coef0)^degree , 2 = radial basis function: exp(-gamma*|u-v|^2)", // Description. 00084 1, // Not zero if the parameter has lower limit. 00085 0, // Parameter's lower limit. 00086 1, // Not zero if the parameter has upper limit. 00087 4, // Parameter's upper limit. 00088 "2" // Parameter's typical (default) value. 00089 }, 00090 // Degree 00091 { 00092 DEGREE_ID, // Id. 00093 "Degree", // Name. 00094 Integer, // Type. 00095 "Degree in kernel function (only for polynomial kernels).", // Overview 00096 "Degree in kernel function (only for polynomial kernels).", // Description. 00097 1, // Not zero if the parameter has lower limit. 00098 0, // Parameter's lower limit. 00099 0, // Not zero if the parameter has upper limit. 00100 0, // Parameter's upper limit. 00101 "3" // Parameter's typical (default) value. 00102 }, 00103 // Gamma 00104 { 00105 GAMMA_ID, // Id. 00106 "Gamma", // Name. 00107 Real, // Type. 00108 "Gamma in kernel function (only for polynomial and radial basis kernels). When set to zero, the default value will actually be 1/k, where k is the number of layers.", // Overview 00109 "Gamma in kernel function (only for polynomial and radial basis kernels). When set to zero, the default value will actually be 1/k, where k is the number of layers.", // Description. 00110 0, // Not zero if the parameter has lower limit. 00111 0, // Parameter's lower limit. 00112 0, // Not zero if the parameter has upper limit. 00113 0, // Parameter's upper limit. 00114 "0" // Parameter's typical (default) value. 00115 }, 00116 // Coef0 00117 { 00118 COEF0_ID, // Id. 00119 "Coef0", // Name. 00120 Real, // Type. 00121 "Coef0 in kernel function (only for polynomial kernels).", // Overview 00122 "Coef0 in kernel function (only for polynomial kernels).", // Description. 00123 0, // Not zero if the parameter has lower limit. 00124 0, // Parameter's lower limit. 00125 0, // Not zero if the parameter has upper limit. 00126 0, // Parameter's upper limit. 00127 "0" // Parameter's typical (default) value. 00128 }, 00129 // C 00130 { 00131 C_ID, // Id. 00132 "Cost", // Name. 00133 Real, // Type. 00134 "Cost (only for C-SVC types).", // Overview 00135 "Cost (only for C-SVC types).", // Description. 00136 1, // Not zero if the parameter has lower limit. 00137 0.001, // Parameter's lower limit. 00138 0, // Not zero if the parameter has upper limit. 00139 0, // Parameter's upper limit. 00140 "1" // Parameter's typical (default) value. 00141 }, 00142 // Nu 00143 { 00144 NU_ID, // Id. 00145 "Nu", // Name. 00146 Real, // Type. 00147 "Nu (only for Nu-SVC and one-class SVM).", // Overview 00148 "Nu (only for Nu-SVC and one-class SVM).", // Description. 00149 1, // Not zero if the parameter has lower limit. 00150 0.001, // Parameter's lower limit. 00151 1, // Not zero if the parameter has upper limit. 00152 1, // Parameter's upper limit. 00153 "0.5" // Parameter's typical (default) value. 00154 }, 00155 // Probabilistic output 00156 { 00157 PROB_ID, // Id. 00158 "Probabilistic output", // Name. 00159 Integer, // Type. 00160 "Indicates if the output should be a probability instead of a binary response (only available for C-SVC and Nu-SVC).", // Overview 00161 "Indicates if the output should be a probability instead of a binary response (only available for C-SVC and Nu-SVC).", // Description. 00162 1, // Not zero if the parameter has lower limit. 00163 0, // Parameter's lower limit. 00164 1, // Not zero if the parameter has upper limit. 00165 1, // Parameter's upper limit. 00166 "1" // Parameter's typical (default) value. 00167 }, 00168 // Number of pseudo absences to be generated 00169 { 00170 PSEUDO_ID, // Id. 00171 "Number of pseudo-absences", // Name. 00172 Integer, // Type. 00173 "Number of pseudo-absences to be generated (only for C-SVC and Nu-SVC when no absences have been provided). When absences are needed, a zero parameter will default to the same number of presences.", // Overview 00174 "Number of pseudo-absences to be generated (only for C-SVC and Nu-SVC when no absences have been provided). When absences are needed, a zero parameter will default to the same number of presences.", // Description. 00175 1, // Not zero if the parameter has lower limit. 00176 0, // Parameter's lower limit. 00177 0, // Not zero if the parameter has upper limit. 00178 0, // Parameter's upper limit. 00179 "0" // Parameter's typical (default) value. 00180 }, 00181 }; 00182 00183 /************************************/ 00184 /*** Algorithm's general metadata ***/ 00185 00186 static AlgMetadata metadata = { 00187 00188 "SVM", // Id. 00189 "SVM (Support Vector Machines)", // Name. 00190 "0.5", // Version. 00191 00192 // Overview 00193 "Support vector machines (SVMs) are a set of related supervised learning methods that belong to a family of generalized linear classifiers. They can also be considered a special case of Tikhonov regularization. A special property of SVMs is that they simultaneously minimize the empirical classification error and maximize the geometric margin; hence they are also known as maximum margin classifiers. Content retrieved from Wikipedia on the 13th of June, 2007: http://en.wikipedia.org/w/index.php?title=Support_vector_machine&oldid=136646498.", 00194 00195 // Description. 00196 "Support vector machines map input vectors to a higher dimensional space where a maximal separating hyperplane is constructed. Two parallel hyperplanes are constructed on each side of the hyperplane that separates the data. The separating hyperplane is the hyperplane that maximises the distance between the two parallel hyperplanes. An assumption is made that the larger the margin or distance between these parallel hyperplanes the better the generalisation error of the classifier will be. The model produced by support vector classification only depends on a subset of the training data, because the cost function for building the model does not care about training points that lie beyond the margin. Content retrieved from Wikipedia on the 13th of June, 2007: http://en.wikipedia.org/w/index.php?title=Support_vector_machine&oldid=136646498. The openModeller implementation of SVMs makes use of the libsvm library version 2.85: Chih-Chung Chang and Chih-Jen Lin, LIBSVM: a library for support vector machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm.\n\nRelease history:\n version 0.1: initial release\n version 0.2: New parameter to specify the number of pseudo-absences to be generated; upgraded to libsvm 2.85; fixed memory leaks\n version 0.3: when absences are needed and the number of pseudo absences to be generated is zero, it will default to the same number of presences\n version 0.4: included missing serialization of C\n version 0.5: the indication if the algorithm needed normalized environmental data was not working when the algorithm was loaded from an existing model.", 00197 00198 "Vladimir N. Vapnik", // Algorithm author. 00199 "1) Vapnik, V. (1995) The Nature of Statistical Learning Theory. SpringerVerlag. 2) Schölkopf, B., Smola, A., Williamson, R. and Bartlett, P.L.(2000). New support vector algorithms. Neural Computation, 12, 1207-1245. 3) Schölkopf, B., Platt, J.C., Shawe-Taylor, J., Smola A.J. and Williamson, R.C. (2001). Estimating the support of a high-dimensional distribution. Neural Computation, 13, 1443-1471. 4) Cristianini, N. & Shawe-Taylor, J. (2000). An Introduction to Support Vector Machines and other kernel-based learning methods. Cambridge University Press.", // Bibliography. 00200 00201 "Renato De Giovanni in collaboration with Ana Carolina Lorena", // Code author. 00202 "renato [at] cria . org . br", // Code author's contact. 00203 00204 0, // Does not accept categorical data. 00205 0, // Does not need (pseudo)absence points. 00206 00207 NUM_PARAM, // Algorithm's parameters. 00208 parameters 00209 }; 00210 00211 // Note: I needed to copy this structure definition from svm.cpp, otherwise 00212 // our custom serialization would not compile. If there's any problem with this 00213 // approach, then I suggest removing this definition and then moving the svm_model 00214 // definition from svm.cpp to svm.h 00215 struct svm_model 00216 { 00217 svm_parameter param;// parameter 00218 int nr_class; // number of classes, = 2 in regression/one class svm 00219 int l; // total #SV 00220 svm_node **SV; // SVs (SV[l]) 00221 double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l]) 00222 double *rho; // constants in decision functions (rho[k*(k-1)/2]) 00223 double *probA; // parwise probability information 00224 double *probB; 00225 00226 // for classification only 00227 00228 int *label; // label of each class (label[k]) 00229 int *nSV; // number of SVs for each class (nSV[k]) 00230 // nSV[0] + nSV[1] + ... + nSV[k-1] = l 00231 // XXX 00232 int free_sv; // 1 if svm_model is created by svm_load_model 00233 // 0 if svm_model is created by svm_train 00234 }; 00235 00236 /****************************************************************/ 00237 /****************** Algorithm's factory function ****************/ 00238 00239 OM_ALG_DLL_EXPORT 00240 AlgorithmImpl * 00241 algorithmFactory() 00242 { 00243 return new SvmAlgorithm(); 00244 } 00245 00246 OM_ALG_DLL_EXPORT 00247 AlgMetadata const * 00248 algorithmMetadata() 00249 { 00250 return &metadata; 00251 } 00252 00253 00254 /*********************************************/ 00255 /************** SVM algorithm ****************/ 00256 00257 /*******************/ 00258 /*** constructor ***/ 00259 00260 SvmAlgorithm::SvmAlgorithm() : 00261 AlgorithmImpl( &metadata ), 00262 _done( false ), 00263 _num_layers( 0 ), 00264 _svm_model( 0 ), 00265 _presence_index( -1 ) 00266 { 00267 _normalizerPtr = new MeanVarianceNormalizer(); 00268 00269 // Needs to be initialized (see destructor) 00270 _svm_model = 0; 00271 00272 _svm_problem.l = 0; 00273 } 00274 00275 00276 /******************/ 00277 /*** destructor ***/ 00278 00279 SvmAlgorithm::~SvmAlgorithm() 00280 { 00281 if ( _svm_model ) { 00282 00283 svm_destroy_model( _svm_model ); 00284 } 00285 00286 if ( _svm_problem.l > 0 ) { 00287 00288 delete[] _svm_problem.y; 00289 00290 for ( int i = 0; i < _svm_problem.l; ++i ) { 00291 00292 delete _svm_problem.x[i]; 00293 } 00294 00295 delete[] _svm_problem.x; 00296 } 00297 } 00298 00299 /**************************/ 00300 /*** need Normalization ***/ 00301 int SvmAlgorithm::needNormalization() 00302 { 00303 int svm_type; 00304 00305 if ( done() ) { 00306 00307 if ( ! _normalizerPtr ) { 00308 00309 return 0; 00310 } 00311 } 00312 else { 00313 00314 if ( getParameter( SVMTYPE_ID, &svm_type ) && svm_type != 2 && _samp->numAbsence() == 0 ) { 00315 00316 // It will be necessary to generate pseudo absences, so do not waste 00317 // time normalizing things because normalization should ideally consider 00318 // all trainning points (including pseudo-absences). In this specific case, 00319 // normalization will take place in initialize(). 00320 return 0; 00321 } 00322 } 00323 00324 return 1; 00325 } 00326 00327 /******************/ 00328 /*** initialize ***/ 00329 int 00330 SvmAlgorithm::initialize() 00331 { 00332 // SVM type 00333 if ( ! getParameter( SVMTYPE_ID, &_svm_parameter.svm_type ) ) { 00334 00335 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" SVMTYPE_ID "' not passed.\n" ); 00336 return 0; 00337 } 00338 00339 // Need to check SVM type because some types from the svm library will not be supported 00340 if ( _svm_parameter.svm_type != 0 && 00341 _svm_parameter.svm_type != 1 && 00342 _svm_parameter.svm_type != 2 ) { 00343 00344 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" SVMTYPE_ID "' not set properly. It must be 0, 1 or 2.\n" ); 00345 return 0; 00346 } 00347 00348 // Kernel type 00349 if ( ! getParameter( KERNELTYPE_ID, &_svm_parameter.kernel_type ) ) { 00350 00351 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" KERNELTYPE_ID "' not passed.\n" ); 00352 return 0; 00353 } 00354 00355 // Need to check Kernel type because some kernels from the svm library will not be supported 00356 if ( _svm_parameter.kernel_type != 0 && 00357 _svm_parameter.kernel_type != 1 && 00358 _svm_parameter.kernel_type != 2 ) { 00359 00360 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" KERNELTYPE_ID "' not set properly. It must be 0, 1 or 2.\n" ); 00361 return 0; 00362 } 00363 00364 // Degree 00365 if ( ! getParameter( DEGREE_ID, &_svm_parameter.degree ) ) { 00366 00367 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" DEGREE_ID "' not passed.\n" ); 00368 return 0; 00369 } 00370 00371 // Gamma 00372 if ( ! getParameter( GAMMA_ID, &_svm_parameter.gamma ) ) { 00373 00374 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" GAMMA_ID "' not passed.\n" ); 00375 return 0; 00376 } 00377 00378 _num_layers = _samp->numIndependent(); 00379 00380 if ( _svm_parameter.gamma == 0 ) { 00381 00382 _svm_parameter.gamma = 1.0/_num_layers; 00383 } 00384 00385 // Coef0 00386 if ( ! getParameter( COEF0_ID, &_svm_parameter.coef0 ) ) { 00387 00388 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" COEF0_ID "' not passed.\n" ); 00389 return 0; 00390 } 00391 00392 // C 00393 if ( ! getParameter( C_ID, &_svm_parameter.C ) ) { 00394 00395 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" C_ID "' not passed.\n" ); 00396 return 0; 00397 } 00398 00399 // Nu 00400 if ( ! getParameter( NU_ID, &_svm_parameter.nu ) ) { 00401 00402 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" NU_ID "' not passed.\n" ); 00403 return 0; 00404 } 00405 00406 // Probabilistic output 00407 if ( ! getParameter( PROB_ID, &_svm_parameter.probability ) ) { 00408 00409 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" PROB_ID "' not passed.\n" ); 00410 return 0; 00411 } 00412 00413 // Check if probabilistic output is 0 or 1 00414 if ( _svm_parameter.probability != 0 && 00415 _svm_parameter.probability != 1 ) { 00416 00417 Log::instance()->error( SVM_LOG_PREFIX "Parameter '" PROB_ID "' not set properly. It must be 0 or 1.\n" ); 00418 return 0; 00419 } 00420 00421 // Probability estimates are not available for one-class SVM 00422 if ( _svm_parameter.svm_type == 2 ) { 00423 00424 Log::instance()->warn( SVM_LOG_PREFIX "Probability estimates are not available for one-class SVM. Ignoring parameter.\n" ); 00425 _svm_parameter.probability = 0; 00426 } 00427 00428 // Is this necessary? 00429 _svm_parameter.cache_size = 100; 00430 _svm_parameter.eps = 1e-3; 00431 _svm_parameter.p = 0.1; 00432 _svm_parameter.shrinking = 1; 00433 _svm_parameter.nr_weight = 0; 00434 _svm_parameter.weight_label = NULL; 00435 _svm_parameter.weight = NULL; 00436 00437 // Remove redundant entries 00438 //_samp->environmentallyUnique(); 00439 00440 // Check the number of presences 00441 int num_presences = _samp->numPresence(); 00442 00443 if ( num_presences == 0 ) { 00444 00445 Log::instance()->warn( SVM_LOG_PREFIX "No presence points inside the mask!\n" ); 00446 return 0; 00447 } 00448 00449 int num_absences = _samp->numAbsence(); 00450 00451 bool generate_pseudo_absences = false; 00452 00453 // All types of SVM will need absences, except one-class SVM 00454 if ( num_absences <= 0 && _svm_parameter.svm_type != 2 ) { 00455 00456 Log::instance()->warn( SVM_LOG_PREFIX "No absence points available.\n" ); 00457 00458 // Pseudo-absences will be generated later 00459 if ( ! getParameter( PSEUDO_ID, &num_absences ) ) { 00460 00461 Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences unspecified. Default will be %d (same number of presences).\n", num_presences ); 00462 00463 num_absences = num_presences; 00464 } 00465 else if ( num_absences == 0 ) { 00466 00467 Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences will be %d (same number of presences).\n", num_presences ); 00468 00469 num_absences = num_presences; 00470 } 00471 else if ( num_absences < 0 ) { 00472 00473 Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences must be a positive number.\n" ); 00474 return 0; 00475 } 00476 00477 generate_pseudo_absences = true; 00478 } 00479 00480 int num_points = num_presences; 00481 00482 // Sum absence points if not dealing with one-class SVM 00483 if ( _svm_parameter.svm_type != 2 ) { 00484 00485 num_points += num_absences; 00486 } 00487 00488 _svm_problem.l = num_points; 00489 00490 _svm_problem.y = new double[num_points]; 00491 _svm_problem.x = new svm_node*[num_points]; 00492 00493 // Load SVM problem with samples 00494 00495 OccurrencesPtr presences = _samp->getPresences(); 00496 00497 OccurrencesImpl::const_iterator p_iterator; 00498 OccurrencesImpl::const_iterator p_end; 00499 00500 int i = 0; // shared counter 00501 00502 // Absences 00503 00504 if ( _svm_parameter.svm_type != 2 ) { 00505 00506 OccurrencesPtr absences; 00507 00508 if ( generate_pseudo_absences ) { 00509 00510 Log::instance()->info( SVM_LOG_PREFIX "Generating pseudo-absences.\n" ); 00511 00512 absences = new OccurrencesImpl( presences->label(), presences->coordSystem() ); 00513 00514 for ( int i = 0; i < num_absences; ++i ) { 00515 00516 OccurrencePtr oc = _samp->getPseudoAbsence(); 00517 absences->insert( oc ); 00518 } 00519 00520 // Compute normalization with all points 00521 SamplerPtr mySamplerPtr = createSampler( _samp->getEnvironment(), presences, absences ); 00522 00523 _normalizerPtr->computeNormalization( mySamplerPtr ); 00524 00525 setNormalization( _samp ); 00526 00527 absences->normalize( _normalizerPtr ); 00528 } 00529 else { 00530 00531 // should be normalized already 00532 absences = _samp->getAbsences(); 00533 } 00534 00535 p_iterator = absences->begin(); 00536 p_end = absences->end(); 00537 00538 while ( p_iterator != p_end ) { 00539 00540 Sample point = (*p_iterator)->environment(); 00541 00542 _svm_problem.y[i] = -1; // absence 00543 00544 _svm_problem.x[i] = new svm_node[_num_layers+1]; 00545 00546 _getNode( _svm_problem.x[i], point ); 00547 00548 ++p_iterator; 00549 ++i; 00550 } 00551 } 00552 00553 // Presences (should be normalized already, in one way or another) 00554 00555 p_iterator = presences->begin(); 00556 p_end = presences->end(); 00557 00558 while ( p_iterator != p_end ) { 00559 00560 Sample point = (*p_iterator)->environment(); 00561 00562 _svm_problem.y[i] = +1; // presence 00563 00564 _svm_problem.x[i] = new svm_node[_num_layers+1]; 00565 00566 _getNode( _svm_problem.x[i], point ); 00567 00568 ++p_iterator; 00569 ++i; 00570 } 00571 00572 // Check parameters using svm library logic 00573 const char *error_msg; 00574 error_msg = svm_check_parameter( &_svm_problem, &_svm_parameter ); 00575 00576 if ( error_msg ) { 00577 00578 Log::instance()->error( error_msg ); 00579 return 0; 00580 } 00581 00582 return 1; 00583 } 00584 00585 00586 /***************/ 00587 /*** iterate ***/ 00588 int 00589 SvmAlgorithm::iterate() 00590 { 00591 _svm_model = svm_train( &_svm_problem, &_svm_parameter ); 00592 00593 if ( _svm_parameter.probability == 1 && svm_check_probability_model( _svm_model ) == 0 ){ 00594 00595 Log::instance()->error( SVM_LOG_PREFIX "Generated model cannot return probability estimates.\n" ); 00596 return 0; 00597 } 00598 00599 int * labels = new int[2]; 00600 00601 svm_get_labels( _svm_model, labels ); 00602 00603 _presence_index = ( labels[0] == +1 ) ? 0 : 1; 00604 00605 delete labels; 00606 00607 _done = true; 00608 00609 // debug 00610 //svm_save_model( "model.svm", _svm_model ); 00611 00612 return 1; 00613 } 00614 00615 00616 /************/ 00617 /*** done ***/ 00618 int 00619 SvmAlgorithm::done() const 00620 { 00621 return _done; 00622 } 00623 00624 /*****************/ 00625 /*** get Value ***/ 00626 Scalar 00627 SvmAlgorithm::getValue( const Sample& x ) const 00628 { 00629 svm_node * node = new svm_node[_num_layers+1]; 00630 00631 _getNode( node, x ); 00632 00633 double prob; 00634 00635 if ( _svm_parameter.probability == 1 ) { 00636 00637 // Probability output 00638 00639 double * estimates = new double[2]; 00640 00641 svm_predict_probability( _svm_model, node, estimates ); 00642 00643 prob = estimates[_presence_index]; 00644 00645 delete[] estimates; 00646 } 00647 else { 00648 00649 // Binary output 00650 00651 double class_predicted = svm_predict( _svm_model, node ); 00652 00653 prob = ( class_predicted < 0.0 ) ? 0 : 1; 00654 } 00655 00656 delete[] node; 00657 00658 return prob; 00659 } 00660 00661 /***********************/ 00662 /*** get Convergence ***/ 00663 int 00664 SvmAlgorithm::getConvergence( Scalar * const val ) const 00665 { 00666 *val = 1.0; 00667 return 1; 00668 } 00669 00670 /****************/ 00671 /*** get Node ***/ 00672 void 00673 SvmAlgorithm::_getNode( svm_node * node, const Sample& sample ) const 00674 { 00675 for ( int j = 0; j < _num_layers; ++j ) { 00676 00677 node[j].index = j+1; // attr index (must start with 1!) 00678 node[j].value = sample[j]; // attr value 00679 } 00680 00681 node[_num_layers].index = -1; // end of array 00682 node[_num_layers].value = 0; // end of array 00683 } 00684 00685 /****************************************************************/ 00686 /****************** configuration *******************************/ 00687 void 00688 SvmAlgorithm::_getConfiguration( ConfigurationPtr& config ) const 00689 { 00690 if ( ! _done ) 00691 return; 00692 00693 ConfigurationPtr model_config( new ConfigurationImpl("Svm") ); 00694 config->addSubsection( model_config ); 00695 00696 model_config->addNameValue( "NumLayers", _num_layers ); 00697 model_config->addNameValue( "Type", _svm_parameter.svm_type ); 00698 model_config->addNameValue( "KernelType", _svm_parameter.kernel_type ); 00699 model_config->addNameValue( "Probabilistic", _svm_parameter.probability ); 00700 model_config->addNameValue( "Degree", _svm_parameter.degree ); 00701 model_config->addNameValue( "Gamma", _svm_parameter.gamma ); 00702 model_config->addNameValue( "Coef0", _svm_parameter.coef0 ); 00703 model_config->addNameValue( "C", _svm_parameter.C ); 00704 model_config->addNameValue( "Rho", _svm_model->rho[0] ); // assuming always one 00705 00706 if ( _svm_parameter.probability == 1 ) { 00707 00708 model_config->addNameValue( "ProbA", _svm_model->probA[0] ); 00709 model_config->addNameValue( "ProbB", _svm_model->probB[0] ); 00710 } 00711 00712 if ( _svm_parameter.svm_type != 2 ) { 00713 00714 model_config->addNameValue( "NrSv", _svm_model->nSV, 2 ); 00715 } 00716 00717 ConfigurationPtr vectors_config( new ConfigurationImpl("Vectors") ); 00718 model_config->addSubsection( vectors_config ); 00719 00720 vectors_config->addNameValue( "Total", _svm_model->l ); 00721 00722 // Labels 00723 int * labels = new int[2]; 00724 00725 svm_get_labels( _svm_model, labels ); 00726 00727 model_config->addNameValue( "Labels", labels, 2 ); 00728 00729 delete labels; 00730 00731 // Vectors 00732 const double * const *sv_coef = _svm_model->sv_coef; 00733 const svm_node * const *SV = _svm_model->SV; 00734 00735 for ( int i = 0; i < _svm_model->l; i++ ) { 00736 00737 ConfigurationPtr vector_config( new ConfigurationImpl("Vector") ); 00738 vectors_config->addSubsection( vector_config ); 00739 00740 vector_config->addNameValue( "Coef", sv_coef[0][i] ); 00741 00742 const svm_node *p = SV[i]; 00743 00744 while ( p->index != -1 ) { 00745 00746 ConfigurationPtr node_config( new ConfigurationImpl("Node") ); 00747 vector_config->addSubsection( node_config ); 00748 00749 node_config->addNameValue( "Index", p->index ); 00750 node_config->addNameValue( "Value", p->value ); 00751 00752 p++; 00753 } 00754 } 00755 } 00756 00757 void 00758 SvmAlgorithm::_setConfiguration( const ConstConfigurationPtr& config ) 00759 { 00760 ConstConfigurationPtr model_config = config->getSubsection( "Svm", false ); 00761 00762 if ( ! model_config ) 00763 return; 00764 00765 _svm_model = (svm_model *)malloc( (1)*sizeof( svm_model ) ); 00766 00767 _svm_model->probA = NULL; 00768 _svm_model->probB = NULL; 00769 _svm_model->label = NULL; 00770 _svm_model->nSV = NULL; 00771 00772 _num_layers = model_config->getAttributeAsInt( "NumLayers", 0 ); 00773 _svm_parameter.svm_type = model_config->getAttributeAsInt( "Type", 0 ); 00774 _svm_parameter.kernel_type = model_config->getAttributeAsInt( "KernelType", 2 ); 00775 _svm_parameter.probability = model_config->getAttributeAsInt( "Probabilistic", 1 ); 00776 _svm_parameter.degree = model_config->getAttributeAsInt( "Degree", 3 ); 00777 _svm_parameter.gamma = model_config->getAttributeAsDouble( "Gamma", 0 ); 00778 _svm_parameter.coef0 = model_config->getAttributeAsDouble( "Coef0", 0 ); 00779 00780 try { 00781 00782 // Serialization of "C" was added in version 0.4 of this algorithm 00783 _svm_parameter.C = model_config->getAttributeAsDouble( "C", 1 ); 00784 } 00785 catch ( AttributeNotFound& e ) { 00786 00787 UNUSED( e ); 00788 } 00789 00790 _svm_parameter.cache_size = 100; 00791 _svm_parameter.eps = 1e-3; 00792 _svm_parameter.p = 0.1; 00793 _svm_parameter.shrinking = 1; 00794 _svm_parameter.nr_weight = 0; 00795 _svm_parameter.weight_label = NULL; 00796 _svm_parameter.weight = NULL; 00797 00798 _svm_model->nr_class = 2; // presence or absence 00799 00800 _svm_model->rho = new double[1]; 00801 00802 _svm_model->rho[0] = model_config->getAttributeAsDouble( "Rho", 0.0 ); // assuming always one 00803 00804 if ( _svm_parameter.probability == 1 ) { 00805 00806 _svm_model->probA = new double[1]; 00807 _svm_model->probB = new double[1]; 00808 00809 _svm_model->probA[0] = model_config->getAttributeAsDouble( "ProbA", 0.0 ); 00810 _svm_model->probB[0] = model_config->getAttributeAsDouble( "ProbB", 0.0 ); 00811 } 00812 00813 if ( _svm_parameter.svm_type != 2 ) { 00814 00815 int size; 00816 model_config->getAttributeAsIntArray( "NrSv", &_svm_model->nSV, &size ); 00817 } 00818 00819 ConstConfigurationPtr vectors_config = model_config->getSubsection( "Vectors", false ); 00820 00821 _svm_model->l = vectors_config->getAttributeAsInt( "Total", 0 ); 00822 00823 Configuration::subsection_list vectors = vectors_config->getAllSubsections(); 00824 00825 Configuration::subsection_list::iterator vec = vectors.begin(); 00826 Configuration::subsection_list::iterator last_vec = vectors.end(); 00827 00828 _svm_model->sv_coef = new double*[_svm_model->nr_class - 1]; 00829 _svm_model->sv_coef[0] = new double[_svm_model->l]; 00830 00831 _svm_model->SV = new svm_node*[_svm_model->l]; 00832 00833 int i = 0; 00834 00835 for ( ; vec != last_vec; ++vec ) { 00836 00837 if ( (*vec)->getName() != "Vector" ) { 00838 00839 continue; 00840 } 00841 00842 _svm_model->sv_coef[0][i] = (*vec)->getAttributeAsDouble( "Coef", 0.0 ); 00843 00844 _svm_model->SV[i] = new svm_node[_num_layers + 1]; 00845 00846 Configuration::subsection_list nodes = (*vec)->getAllSubsections(); 00847 00848 Configuration::subsection_list::iterator node = nodes.begin(); 00849 Configuration::subsection_list::iterator last_node = nodes.end(); 00850 00851 int j = 0; 00852 00853 for ( ; node != last_node; ++node ) { 00854 00855 if ( (*node)->getName() != "Node" ) { 00856 00857 continue; 00858 } 00859 00860 _svm_model->SV[i][j].index = (*node)->getAttributeAsInt( "Index", 0 ); 00861 _svm_model->SV[i][j].value = (*node)->getAttributeAsDouble( "Value", 0.0 ); 00862 00863 ++j; 00864 } 00865 00866 _svm_model->SV[i][j].index = -1; 00867 _svm_model->SV[i][j].value = 0.0; 00868 00869 ++i; 00870 } 00871 00872 // Labels 00873 int size; 00874 model_config->getAttributeAsIntArray( "Labels", &_svm_model->label, &size ); 00875 00876 _presence_index = ( _svm_model->label[0] == +1 ) ? 0 : 1; 00877 00878 _svm_model->param = _svm_parameter; 00879 00880 _svm_model->free_sv = 1; 00881 00882 _done = true; 00883 }