openModeller  Version 1.5.0
svm_alg.cpp
Go to the documentation of this file.
1 
27 #include "svm_alg.hh"
28 #include "svm.h"
30 #include <openmodeller/Sampler.hh>
32 
33 #include <string.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 
37 //debug
38 #include <iostream>
39 
40 using namespace std;
41 
42 /****************************************************************/
43 /********************** Algorithm's Metadata ********************/
44 
45 #define NUM_PARAM 9
46 
47 #define SVMTYPE_ID "SvmType"
48 #define KERNELTYPE_ID "KernelType"
49 #define DEGREE_ID "Degree"
50 #define GAMMA_ID "Gamma"
51 #define COEF0_ID "Coef0"
52 #define C_ID "C"
53 #define NU_ID "Nu"
54 #define PROB_ID "ProbabilisticOutput"
55 #define PSEUDO_ID "NumberOfPseudoAbsences"
56 
57 #define SVM_LOG_PREFIX "SvmAlgorithm: "
58 
59 /******************************/
60 /*** Algorithm's parameters ***/
61 
63 
64  // SVM type
65  {
66  SVMTYPE_ID, // Id.
67  "SVM type", // Name.
68  Integer, // Type.
69  "Type of SVM: 0 = C-SVC, 1 = Nu-SVC, 2 = one-class SVM", // Overview
70  "Type of SVM: 0 = C-SVC, 1 = Nu-SVC, 2 = one-class SVM", // Description.
71  1, // Not zero if the parameter has lower limit.
72  0, // Parameter's lower limit.
73  1, // Not zero if the parameter has upper limit.
74  2, // Parameter's upper limit.
75  "0" // Parameter's typical (default) value.
76  },
77  // Kernel type
78  {
79  KERNELTYPE_ID, // Id.
80  "Kernel type", // Name.
81  Integer, // Type.
82  "Type of kernel function: 0 = linear: u'*v , 1 = polynomial: (gamma*u'*v + coef0)^degree , 2 = radial basis function: exp(-gamma*|u-v|^2)", // Overview
83  "Type of kernel function: 0 = linear: u'*v , 1 = polynomial: (gamma*u'*v + coef0)^degree , 2 = radial basis function: exp(-gamma*|u-v|^2)", // Description.
84  1, // Not zero if the parameter has lower limit.
85  0, // Parameter's lower limit.
86  1, // Not zero if the parameter has upper limit.
87  4, // Parameter's upper limit.
88  "2" // Parameter's typical (default) value.
89  },
90  // Degree
91  {
92  DEGREE_ID, // Id.
93  "Degree", // Name.
94  Integer, // Type.
95  "Degree in kernel function (only for polynomial kernels).", // Overview
96  "Degree in kernel function (only for polynomial kernels).", // Description.
97  1, // Not zero if the parameter has lower limit.
98  0, // Parameter's lower limit.
99  0, // Not zero if the parameter has upper limit.
100  0, // Parameter's upper limit.
101  "3" // Parameter's typical (default) value.
102  },
103  // Gamma
104  {
105  GAMMA_ID, // Id.
106  "Gamma", // Name.
107  Real, // Type.
108  "Gamma in kernel function (only for polynomial and radial basis kernels). When set to zero, the default value will actually be 1/k, where k is the number of layers.", // Overview
109  "Gamma in kernel function (only for polynomial and radial basis kernels). When set to zero, the default value will actually be 1/k, where k is the number of layers.", // Description.
110  0, // Not zero if the parameter has lower limit.
111  0, // Parameter's lower limit.
112  0, // Not zero if the parameter has upper limit.
113  0, // Parameter's upper limit.
114  "0" // Parameter's typical (default) value.
115  },
116  // Coef0
117  {
118  COEF0_ID, // Id.
119  "Coef0", // Name.
120  Real, // Type.
121  "Coef0 in kernel function (only for polynomial kernels).", // Overview
122  "Coef0 in kernel function (only for polynomial kernels).", // Description.
123  0, // Not zero if the parameter has lower limit.
124  0, // Parameter's lower limit.
125  0, // Not zero if the parameter has upper limit.
126  0, // Parameter's upper limit.
127  "0" // Parameter's typical (default) value.
128  },
129  // C
130  {
131  C_ID, // Id.
132  "Cost", // Name.
133  Real, // Type.
134  "Cost (only for C-SVC types).", // Overview
135  "Cost (only for C-SVC types).", // Description.
136  1, // Not zero if the parameter has lower limit.
137  0.001, // Parameter's lower limit.
138  0, // Not zero if the parameter has upper limit.
139  0, // Parameter's upper limit.
140  "1" // Parameter's typical (default) value.
141  },
142  // Nu
143  {
144  NU_ID, // Id.
145  "Nu", // Name.
146  Real, // Type.
147  "Nu (only for Nu-SVC and one-class SVM).", // Overview
148  "Nu (only for Nu-SVC and one-class SVM).", // Description.
149  1, // Not zero if the parameter has lower limit.
150  0.001, // Parameter's lower limit.
151  1, // Not zero if the parameter has upper limit.
152  1, // Parameter's upper limit.
153  "0.5" // Parameter's typical (default) value.
154  },
155  // Probabilistic output
156  {
157  PROB_ID, // Id.
158  "Probabilistic output", // Name.
159  Integer, // Type.
160  "Indicates if the output should be a probability instead of a binary response (only available for C-SVC and Nu-SVC).", // Overview
161  "Indicates if the output should be a probability instead of a binary response (only available for C-SVC and Nu-SVC).", // Description.
162  1, // Not zero if the parameter has lower limit.
163  0, // Parameter's lower limit.
164  1, // Not zero if the parameter has upper limit.
165  1, // Parameter's upper limit.
166  "1" // Parameter's typical (default) value.
167  },
168  // Number of pseudo absences to be generated
169  {
170  PSEUDO_ID, // Id.
171  "Number of pseudo-absences", // Name.
172  Integer, // Type.
173  "Number of pseudo-absences to be generated (only for C-SVC and Nu-SVC when no absences have been provided). When absences are needed, a zero parameter will default to the same number of presences.", // Overview
174  "Number of pseudo-absences to be generated (only for C-SVC and Nu-SVC when no absences have been provided). When absences are needed, a zero parameter will default to the same number of presences.", // Description.
175  1, // Not zero if the parameter has lower limit.
176  0, // Parameter's lower limit.
177  0, // Not zero if the parameter has upper limit.
178  0, // Parameter's upper limit.
179  "0" // Parameter's typical (default) value.
180  },
181 };
182 
183 /************************************/
184 /*** Algorithm's general metadata ***/
185 
187 
188  "SVM", // Id.
189  "SVM (Support Vector Machines)", // Name.
190  "0.5", // Version.
191 
192  // Overview
193  "Support vector machines (SVMs) are a set of related supervised learning methods that belong to a family of generalized linear classifiers. They can also be considered a special case of Tikhonov regularization. A special property of SVMs is that they simultaneously minimize the empirical classification error and maximize the geometric margin; hence they are also known as maximum margin classifiers. Content retrieved from Wikipedia on the 13th of June, 2007: http://en.wikipedia.org/w/index.php?title=Support_vector_machine&oldid=136646498.",
194 
195  // Description.
196  "Support vector machines map input vectors to a higher dimensional space where a maximal separating hyperplane is constructed. Two parallel hyperplanes are constructed on each side of the hyperplane that separates the data. The separating hyperplane is the hyperplane that maximises the distance between the two parallel hyperplanes. An assumption is made that the larger the margin or distance between these parallel hyperplanes the better the generalisation error of the classifier will be. The model produced by support vector classification only depends on a subset of the training data, because the cost function for building the model does not care about training points that lie beyond the margin. Content retrieved from Wikipedia on the 13th of June, 2007: http://en.wikipedia.org/w/index.php?title=Support_vector_machine&oldid=136646498. The openModeller implementation of SVMs makes use of the libsvm library version 2.85: Chih-Chung Chang and Chih-Jen Lin, LIBSVM: a library for support vector machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm.\n\nRelease history:\n version 0.1: initial release\n version 0.2: New parameter to specify the number of pseudo-absences to be generated; upgraded to libsvm 2.85; fixed memory leaks\n version 0.3: when absences are needed and the number of pseudo absences to be generated is zero, it will default to the same number of presences\n version 0.4: included missing serialization of C\n version 0.5: the indication if the algorithm needed normalized environmental data was not working when the algorithm was loaded from an existing model.",
197 
198  "Vladimir N. Vapnik", // Algorithm author.
199  "1) Vapnik, V. (1995) The Nature of Statistical Learning Theory. SpringerVerlag. 2) Schölkopf, B., Smola, A., Williamson, R. and Bartlett, P.L.(2000). New support vector algorithms. Neural Computation, 12, 1207-1245. 3) Schölkopf, B., Platt, J.C., Shawe-Taylor, J., Smola A.J. and Williamson, R.C. (2001). Estimating the support of a high-dimensional distribution. Neural Computation, 13, 1443-1471. 4) Cristianini, N. & Shawe-Taylor, J. (2000). An Introduction to Support Vector Machines and other kernel-based learning methods. Cambridge University Press.", // Bibliography.
200 
201  "Renato De Giovanni in collaboration with Ana Carolina Lorena", // Code author.
202  "renato [at] cria . org . br", // Code author's contact.
203 
204  0, // Does not accept categorical data.
205  0, // Does not need (pseudo)absence points.
206 
207  NUM_PARAM, // Algorithm's parameters.
208  parameters
209 };
210 
211 // Note: I needed to copy this structure definition from svm.cpp, otherwise
212 // our custom serialization would not compile. If there's any problem with this
213 // approach, then I suggest removing this definition and then moving the svm_model
214 // definition from svm.cpp to svm.h
215 struct svm_model
216 {
217  svm_parameter param;// parameter
218  int nr_class; // number of classes, = 2 in regression/one class svm
219  int l; // total #SV
220  svm_node **SV; // SVs (SV[l])
221  double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
222  double *rho; // constants in decision functions (rho[k*(k-1)/2])
223  double *probA; // parwise probability information
224  double *probB;
225 
226  // for classification only
227 
228  int *label; // label of each class (label[k])
229  int *nSV; // number of SVs for each class (nSV[k])
230  // nSV[0] + nSV[1] + ... + nSV[k-1] = l
231  // XXX
232  int free_sv; // 1 if svm_model is created by svm_load_model
233  // 0 if svm_model is created by svm_train
234 };
235 
236 /****************************************************************/
237 /****************** Algorithm's factory function ****************/
238 
239 OM_ALG_DLL_EXPORT
242 {
243  return new SvmAlgorithm();
244 }
245 
246 OM_ALG_DLL_EXPORT
247 AlgMetadata const *
249 {
250  return &metadata;
251 }
252 
253 
254 /*********************************************/
255 /************** SVM algorithm ****************/
256 
257 /*******************/
258 /*** constructor ***/
259 
262  _done( false ),
263  _num_layers( 0 ),
264  _svm_model( 0 ),
265  _presence_index( -1 )
266 {
268 
269  // Needs to be initialized (see destructor)
270  _svm_model = 0;
271 
272  _svm_problem.l = 0;
273 }
274 
275 
276 /******************/
277 /*** destructor ***/
278 
280 {
281  if ( _svm_model ) {
282 
283  svm_destroy_model( _svm_model );
284  }
285 
286  if ( _svm_problem.l > 0 ) {
287 
288  delete[] _svm_problem.y;
289 
290  for ( int i = 0; i < _svm_problem.l; ++i ) {
291 
292  delete _svm_problem.x[i];
293  }
294 
295  delete[] _svm_problem.x;
296  }
297 }
298 
299 /**************************/
300 /*** need Normalization ***/
302 {
303  int svm_type;
304 
305  if ( done() ) {
306 
307  if ( ! _normalizerPtr ) {
308 
309  return 0;
310  }
311  }
312  else {
313 
314  if ( getParameter( SVMTYPE_ID, &svm_type ) && svm_type != 2 && _samp->numAbsence() == 0 ) {
315 
316  // It will be necessary to generate pseudo absences, so do not waste
317  // time normalizing things because normalization should ideally consider
318  // all trainning points (including pseudo-absences). In this specific case,
319  // normalization will take place in initialize().
320  return 0;
321  }
322  }
323 
324  return 1;
325 }
326 
327 /******************/
328 /*** initialize ***/
329 int
331 {
332  // SVM type
333  if ( ! getParameter( SVMTYPE_ID, &_svm_parameter.svm_type ) ) {
334 
335  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" SVMTYPE_ID "' not passed.\n" );
336  return 0;
337  }
338 
339  // Need to check SVM type because some types from the svm library will not be supported
340  if ( _svm_parameter.svm_type != 0 &&
341  _svm_parameter.svm_type != 1 &&
342  _svm_parameter.svm_type != 2 ) {
343 
344  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" SVMTYPE_ID "' not set properly. It must be 0, 1 or 2.\n" );
345  return 0;
346  }
347 
348  // Kernel type
349  if ( ! getParameter( KERNELTYPE_ID, &_svm_parameter.kernel_type ) ) {
350 
351  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" KERNELTYPE_ID "' not passed.\n" );
352  return 0;
353  }
354 
355  // Need to check Kernel type because some kernels from the svm library will not be supported
356  if ( _svm_parameter.kernel_type != 0 &&
357  _svm_parameter.kernel_type != 1 &&
358  _svm_parameter.kernel_type != 2 ) {
359 
360  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" KERNELTYPE_ID "' not set properly. It must be 0, 1 or 2.\n" );
361  return 0;
362  }
363 
364  // Degree
365  if ( ! getParameter( DEGREE_ID, &_svm_parameter.degree ) ) {
366 
367  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" DEGREE_ID "' not passed.\n" );
368  return 0;
369  }
370 
371  // Gamma
372  if ( ! getParameter( GAMMA_ID, &_svm_parameter.gamma ) ) {
373 
374  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" GAMMA_ID "' not passed.\n" );
375  return 0;
376  }
377 
378  _num_layers = _samp->numIndependent();
379 
380  if ( _svm_parameter.gamma == 0 ) {
381 
382  _svm_parameter.gamma = 1.0/_num_layers;
383  }
384 
385  // Coef0
386  if ( ! getParameter( COEF0_ID, &_svm_parameter.coef0 ) ) {
387 
388  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" COEF0_ID "' not passed.\n" );
389  return 0;
390  }
391 
392  // C
393  if ( ! getParameter( C_ID, &_svm_parameter.C ) ) {
394 
395  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" C_ID "' not passed.\n" );
396  return 0;
397  }
398 
399  // Nu
400  if ( ! getParameter( NU_ID, &_svm_parameter.nu ) ) {
401 
402  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" NU_ID "' not passed.\n" );
403  return 0;
404  }
405 
406  // Probabilistic output
407  if ( ! getParameter( PROB_ID, &_svm_parameter.probability ) ) {
408 
409  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" PROB_ID "' not passed.\n" );
410  return 0;
411  }
412 
413  // Check if probabilistic output is 0 or 1
414  if ( _svm_parameter.probability != 0 &&
415  _svm_parameter.probability != 1 ) {
416 
417  Log::instance()->error( SVM_LOG_PREFIX "Parameter '" PROB_ID "' not set properly. It must be 0 or 1.\n" );
418  return 0;
419  }
420 
421  // Probability estimates are not available for one-class SVM
422  if ( _svm_parameter.svm_type == 2 ) {
423 
424  Log::instance()->warn( SVM_LOG_PREFIX "Probability estimates are not available for one-class SVM. Ignoring parameter.\n" );
425  _svm_parameter.probability = 0;
426  }
427 
428  // Is this necessary?
429  _svm_parameter.cache_size = 100;
430  _svm_parameter.eps = 1e-3;
431  _svm_parameter.p = 0.1;
432  _svm_parameter.shrinking = 1;
433  _svm_parameter.nr_weight = 0;
434  _svm_parameter.weight_label = NULL;
435  _svm_parameter.weight = NULL;
436 
437  // Remove redundant entries
438  //_samp->environmentallyUnique();
439 
440  // Check the number of presences
441  int num_presences = _samp->numPresence();
442 
443  if ( num_presences == 0 ) {
444 
445  Log::instance()->warn( SVM_LOG_PREFIX "No presence points inside the mask!\n" );
446  return 0;
447  }
448 
449  int num_absences = _samp->numAbsence();
450 
451  bool generate_pseudo_absences = false;
452 
453  // All types of SVM will need absences, except one-class SVM
454  if ( num_absences <= 0 && _svm_parameter.svm_type != 2 ) {
455 
456  Log::instance()->warn( SVM_LOG_PREFIX "No absence points available.\n" );
457 
458  // Pseudo-absences will be generated later
459  if ( ! getParameter( PSEUDO_ID, &num_absences ) ) {
460 
461  Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences unspecified. Default will be %d (same number of presences).\n", num_presences );
462 
463  num_absences = num_presences;
464  }
465  else if ( num_absences == 0 ) {
466 
467  Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences will be %d (same number of presences).\n", num_presences );
468 
469  num_absences = num_presences;
470  }
471  else if ( num_absences < 0 ) {
472 
473  Log::instance()->warn( SVM_LOG_PREFIX "Number of pseudo absences must be a positive number.\n" );
474  return 0;
475  }
476 
477  generate_pseudo_absences = true;
478  }
479 
480  int num_points = num_presences;
481 
482  // Sum absence points if not dealing with one-class SVM
483  if ( _svm_parameter.svm_type != 2 ) {
484 
485  num_points += num_absences;
486  }
487 
488  _svm_problem.l = num_points;
489 
490  _svm_problem.y = new double[num_points];
491  _svm_problem.x = new svm_node*[num_points];
492 
493  // Load SVM problem with samples
494 
495  OccurrencesPtr presences = _samp->getPresences();
496 
499 
500  int i = 0; // shared counter
501 
502  // Absences
503 
504  if ( _svm_parameter.svm_type != 2 ) {
505 
506  OccurrencesPtr absences;
507 
508  if ( generate_pseudo_absences ) {
509 
510  Log::instance()->info( SVM_LOG_PREFIX "Generating pseudo-absences.\n" );
511 
512  absences = new OccurrencesImpl( presences->label(), presences->coordSystem() );
513 
514  for ( int i = 0; i < num_absences; ++i ) {
515 
516  OccurrencePtr oc = _samp->getPseudoAbsence();
517  absences->insert( oc );
518  }
519 
520  // Compute normalization with all points
521  SamplerPtr mySamplerPtr = createSampler( _samp->getEnvironment(), presences, absences );
522 
523  _normalizerPtr->computeNormalization( mySamplerPtr );
524 
526 
527  absences->normalize( _normalizerPtr );
528  }
529  else {
530 
531  // should be normalized already
532  absences = _samp->getAbsences();
533  }
534 
535  p_iterator = absences->begin();
536  p_end = absences->end();
537 
538  while ( p_iterator != p_end ) {
539 
540  Sample point = (*p_iterator)->environment();
541 
542  _svm_problem.y[i] = -1; // absence
543 
544  _svm_problem.x[i] = new svm_node[_num_layers+1];
545 
546  _getNode( _svm_problem.x[i], point );
547 
548  ++p_iterator;
549  ++i;
550  }
551  }
552 
553  // Presences (should be normalized already, in one way or another)
554 
555  p_iterator = presences->begin();
556  p_end = presences->end();
557 
558  while ( p_iterator != p_end ) {
559 
560  Sample point = (*p_iterator)->environment();
561 
562  _svm_problem.y[i] = +1; // presence
563 
564  _svm_problem.x[i] = new svm_node[_num_layers+1];
565 
566  _getNode( _svm_problem.x[i], point );
567 
568  ++p_iterator;
569  ++i;
570  }
571 
572  // Check parameters using svm library logic
573  const char *error_msg;
574  error_msg = svm_check_parameter( &_svm_problem, &_svm_parameter );
575 
576  if ( error_msg ) {
577 
578  Log::instance()->error( error_msg );
579  return 0;
580  }
581 
582  return 1;
583 }
584 
585 
586 /***************/
587 /*** iterate ***/
588 int
590 {
591  _svm_model = svm_train( &_svm_problem, &_svm_parameter );
592 
593  if ( _svm_parameter.probability == 1 && svm_check_probability_model( _svm_model ) == 0 ){
594 
595  Log::instance()->error( SVM_LOG_PREFIX "Generated model cannot return probability estimates.\n" );
596  return 0;
597  }
598 
599  int * labels = new int[2];
600 
601  svm_get_labels( _svm_model, labels );
602 
603  _presence_index = ( labels[0] == +1 ) ? 0 : 1;
604 
605  delete labels;
606 
607  _done = true;
608 
609  // debug
610  //svm_save_model( "model.svm", _svm_model );
611 
612  return 1;
613 }
614 
615 
616 /************/
617 /*** done ***/
618 int
620 {
621  return _done;
622 }
623 
624 /*****************/
625 /*** get Value ***/
626 Scalar
628 {
629  svm_node * node = new svm_node[_num_layers+1];
630 
631  _getNode( node, x );
632 
633  double prob;
634 
635  if ( _svm_parameter.probability == 1 ) {
636 
637  // Probability output
638 
639  double * estimates = new double[2];
640 
641  svm_predict_probability( _svm_model, node, estimates );
642 
643  prob = estimates[_presence_index];
644 
645  delete[] estimates;
646  }
647  else {
648 
649  // Binary output
650 
651  double class_predicted = svm_predict( _svm_model, node );
652 
653  prob = ( class_predicted < 0.0 ) ? 0 : 1;
654  }
655 
656  delete[] node;
657 
658  return prob;
659 }
660 
661 /***********************/
662 /*** get Convergence ***/
663 int
665 {
666  *val = 1.0;
667  return 1;
668 }
669 
670 /****************/
671 /*** get Node ***/
672 void
673 SvmAlgorithm::_getNode( svm_node * node, const Sample& sample ) const
674 {
675  for ( int j = 0; j < _num_layers; ++j ) {
676 
677  node[j].index = j+1; // attr index (must start with 1!)
678  node[j].value = sample[j]; // attr value
679  }
680 
681  node[_num_layers].index = -1; // end of array
682  node[_num_layers].value = 0; // end of array
683 }
684 
685 /****************************************************************/
686 /****************** configuration *******************************/
687 void
689 {
690  if ( ! _done )
691  return;
692 
693  ConfigurationPtr model_config( new ConfigurationImpl("Svm") );
694  config->addSubsection( model_config );
695 
696  model_config->addNameValue( "NumLayers", _num_layers );
697  model_config->addNameValue( "Type", _svm_parameter.svm_type );
698  model_config->addNameValue( "KernelType", _svm_parameter.kernel_type );
699  model_config->addNameValue( "Probabilistic", _svm_parameter.probability );
700  model_config->addNameValue( "Degree", _svm_parameter.degree );
701  model_config->addNameValue( "Gamma", _svm_parameter.gamma );
702  model_config->addNameValue( "Coef0", _svm_parameter.coef0 );
703  model_config->addNameValue( "C", _svm_parameter.C );
704  model_config->addNameValue( "Rho", _svm_model->rho[0] ); // assuming always one
705 
706  if ( _svm_parameter.probability == 1 ) {
707 
708  model_config->addNameValue( "ProbA", _svm_model->probA[0] );
709  model_config->addNameValue( "ProbB", _svm_model->probB[0] );
710  }
711 
712  if ( _svm_parameter.svm_type != 2 ) {
713 
714  model_config->addNameValue( "NrSv", _svm_model->nSV, 2 );
715  }
716 
717  ConfigurationPtr vectors_config( new ConfigurationImpl("Vectors") );
718  model_config->addSubsection( vectors_config );
719 
720  vectors_config->addNameValue( "Total", _svm_model->l );
721 
722  // Labels
723  int * labels = new int[2];
724 
725  svm_get_labels( _svm_model, labels );
726 
727  model_config->addNameValue( "Labels", labels, 2 );
728 
729  delete labels;
730 
731  // Vectors
732  const double * const *sv_coef = _svm_model->sv_coef;
733  const svm_node * const *SV = _svm_model->SV;
734 
735  for ( int i = 0; i < _svm_model->l; i++ ) {
736 
737  ConfigurationPtr vector_config( new ConfigurationImpl("Vector") );
738  vectors_config->addSubsection( vector_config );
739 
740  vector_config->addNameValue( "Coef", sv_coef[0][i] );
741 
742  const svm_node *p = SV[i];
743 
744  while ( p->index != -1 ) {
745 
746  ConfigurationPtr node_config( new ConfigurationImpl("Node") );
747  vector_config->addSubsection( node_config );
748 
749  node_config->addNameValue( "Index", p->index );
750  node_config->addNameValue( "Value", p->value );
751 
752  p++;
753  }
754  }
755 }
756 
757 void
759 {
760  ConstConfigurationPtr model_config = config->getSubsection( "Svm", false );
761 
762  if ( ! model_config )
763  return;
764 
765  _svm_model = (svm_model *)malloc( (1)*sizeof( svm_model ) );
766 
767  _svm_model->probA = NULL;
768  _svm_model->probB = NULL;
769  _svm_model->label = NULL;
770  _svm_model->nSV = NULL;
771 
772  _num_layers = model_config->getAttributeAsInt( "NumLayers", 0 );
773  _svm_parameter.svm_type = model_config->getAttributeAsInt( "Type", 0 );
774  _svm_parameter.kernel_type = model_config->getAttributeAsInt( "KernelType", 2 );
775  _svm_parameter.probability = model_config->getAttributeAsInt( "Probabilistic", 1 );
776  _svm_parameter.degree = model_config->getAttributeAsInt( "Degree", 3 );
777  _svm_parameter.gamma = model_config->getAttributeAsDouble( "Gamma", 0 );
778  _svm_parameter.coef0 = model_config->getAttributeAsDouble( "Coef0", 0 );
779 
780  try {
781 
782  // Serialization of "C" was added in version 0.4 of this algorithm
783  _svm_parameter.C = model_config->getAttributeAsDouble( "C", 1 );
784  }
785  catch ( AttributeNotFound& e ) {
786 
787  UNUSED( e );
788  }
789 
790  _svm_parameter.cache_size = 100;
791  _svm_parameter.eps = 1e-3;
792  _svm_parameter.p = 0.1;
793  _svm_parameter.shrinking = 1;
794  _svm_parameter.nr_weight = 0;
795  _svm_parameter.weight_label = NULL;
796  _svm_parameter.weight = NULL;
797 
798  _svm_model->nr_class = 2; // presence or absence
799 
800  _svm_model->rho = new double[1];
801 
802  _svm_model->rho[0] = model_config->getAttributeAsDouble( "Rho", 0.0 ); // assuming always one
803 
804  if ( _svm_parameter.probability == 1 ) {
805 
806  _svm_model->probA = new double[1];
807  _svm_model->probB = new double[1];
808 
809  _svm_model->probA[0] = model_config->getAttributeAsDouble( "ProbA", 0.0 );
810  _svm_model->probB[0] = model_config->getAttributeAsDouble( "ProbB", 0.0 );
811  }
812 
813  if ( _svm_parameter.svm_type != 2 ) {
814 
815  int size;
816  model_config->getAttributeAsIntArray( "NrSv", &_svm_model->nSV, &size );
817  }
818 
819  ConstConfigurationPtr vectors_config = model_config->getSubsection( "Vectors", false );
820 
821  _svm_model->l = vectors_config->getAttributeAsInt( "Total", 0 );
822 
823  Configuration::subsection_list vectors = vectors_config->getAllSubsections();
824 
825  Configuration::subsection_list::iterator vec = vectors.begin();
826  Configuration::subsection_list::iterator last_vec = vectors.end();
827 
828  _svm_model->sv_coef = new double*[_svm_model->nr_class - 1];
829  _svm_model->sv_coef[0] = new double[_svm_model->l];
830 
831  _svm_model->SV = new svm_node*[_svm_model->l];
832 
833  int i = 0;
834 
835  for ( ; vec != last_vec; ++vec ) {
836 
837  if ( (*vec)->getName() != "Vector" ) {
838 
839  continue;
840  }
841 
842  _svm_model->sv_coef[0][i] = (*vec)->getAttributeAsDouble( "Coef", 0.0 );
843 
844  _svm_model->SV[i] = new svm_node[_num_layers + 1];
845 
846  Configuration::subsection_list nodes = (*vec)->getAllSubsections();
847 
848  Configuration::subsection_list::iterator node = nodes.begin();
849  Configuration::subsection_list::iterator last_node = nodes.end();
850 
851  int j = 0;
852 
853  for ( ; node != last_node; ++node ) {
854 
855  if ( (*node)->getName() != "Node" ) {
856 
857  continue;
858  }
859 
860  _svm_model->SV[i][j].index = (*node)->getAttributeAsInt( "Index", 0 );
861  _svm_model->SV[i][j].value = (*node)->getAttributeAsDouble( "Value", 0.0 );
862 
863  ++j;
864  }
865 
866  _svm_model->SV[i][j].index = -1;
867  _svm_model->SV[i][j].value = 0.0;
868 
869  ++i;
870  }
871 
872  // Labels
873  int size;
874  model_config->getAttributeAsIntArray( "Labels", &_svm_model->label, &size );
875 
876  _presence_index = ( _svm_model->label[0] == +1 ) ? 0 : 1;
877 
879 
880  _svm_model->free_sv = 1;
881 
882  _done = true;
883 }
int * nSV
Definition: svm_alg.cpp:229
void _getConfiguration(ConfigurationPtr &) const
Definition: svm_alg.cpp:688
void warn(const char *format,...)
'Warn' level.
Definition: Log.cpp:273
svm_parameter param
Definition: svm_alg.cpp:217
std::vector< ConfigurationPtr > subsection_list
void _getNode(svm_node *node, const Sample &sample) const
Definition: svm_alg.cpp:673
double Scalar
Type of map values.
Definition: om_defs.hh:39
int _num_layers
Definition: svm_alg.hh:66
int done() const
Definition: svm_alg.cpp:619
static AlgMetadata metadata
Definition: svm_alg.cpp:186
static Log * instance()
Returns the instance pointer, creating the object on the first call.
Definition: Log.cpp:45
#define COEF0_ID
Definition: svm_alg.cpp:51
svm_node ** SV
Definition: svm_alg.cpp:220
#define GAMMA_ID
Definition: svm_alg.cpp:50
double * probB
Definition: svm_alg.cpp:224
#define C_ID
Definition: svm_alg.cpp:52
static AlgParamMetadata parameters[NUM_PARAM]
Definition: svm_alg.cpp:62
#define SVMTYPE_ID
Definition: svm_alg.cpp:47
svm_problem _svm_problem
Definition: svm_alg.hh:70
SamplerPtr createSampler(const EnvironmentPtr &env, const OccurrencesPtr &presence, const OccurrencesPtr &absence)
Definition: Sampler.cpp:52
#define NU_ID
Definition: svm_alg.cpp:53
double * probA
Definition: svm_alg.cpp:223
int nr_class
Definition: svm_alg.cpp:218
void error(const char *format,...)
'Error' level.
Definition: Log.cpp:290
bool _done
Definition: svm_alg.hh:64
OM_ALG_DLL_EXPORT AlgMetadata const * algorithmMetadata()
Definition: svm_alg.cpp:248
int getParameter(std::string const &name, std::string *value)
void setNormalization(const SamplerPtr &samp) const
Definition: Algorithm.cpp:350
#define UNUSED(symbol)
Definition: os_specific.hh:55
int needNormalization()
Definition: svm_alg.cpp:301
int initialize()
Definition: svm_alg.cpp:330
int _presence_index
Definition: svm_alg.hh:76
int iterate()
Definition: svm_alg.cpp:589
OM_ALG_DLL_EXPORT AlgorithmImpl * algorithmFactory()
Definition: svm_alg.cpp:241
#define SVM_LOG_PREFIX
Definition: svm_alg.cpp:57
int getConvergence(Scalar *const val) const
Definition: svm_alg.cpp:664
int * label
Definition: svm_alg.cpp:228
#define NUM_PARAM
Definition: svm_alg.cpp:45
svm_model * _svm_model
Definition: svm_alg.hh:72
double * rho
Definition: svm_alg.cpp:222
double ** sv_coef
Definition: svm_alg.cpp:221
SamplerPtr _samp
Definition: Algorithm.hh:245
#define PROB_ID
Definition: svm_alg.cpp:54
void info(const char *format,...)
'Info' level.
Definition: Log.cpp:256
int free_sv
Definition: svm_alg.cpp:232
void _setConfiguration(const ConstConfigurationPtr &)
Definition: svm_alg.cpp:758
#define PSEUDO_ID
Definition: svm_alg.cpp:55
std::vector< OccurrencePtr >::const_iterator const_iterator
Definition: Occurrences.hh:85
svm_parameter _svm_parameter
Definition: svm_alg.hh:68
virtual void computeNormalization(const ReferenceCountedPointer< const SamplerImpl > &samplerPtr)=0
#define DEGREE_ID
Definition: svm_alg.cpp:49
Scalar getValue(const Sample &x) const
Definition: svm_alg.cpp:627
Normalizer * _normalizerPtr
Definition: Algorithm.hh:247
Definition: Sample.hh:25
#define KERNELTYPE_ID
Definition: svm_alg.cpp:48