15 #define _SCL_SECURE_NO_DEPRECATE
22 #include <gsl/gsl_statistics_double.h>
23 #include <gsl/gsl_multifit_nlin.h>
24 #include <gsl/gsl_math.h>
25 #include <gsl/gsl_eigen.h>
26 #include <gsl/gsl_sf_gamma.h>
27 #include <gsl/gsl_randist.h>
28 #include <gsl/gsl_permutation.h>
29 #include <gsl/gsl_heapsort.h>
30 #include <gsl/gsl_sort.h>
31 #include <gsl/gsl_sort_vector.h>
51 "Number of random eigenvalues",
53 "The number of eigenvalues to generate from randomly 'shuffled' environment data.",
54 "The Broken Stick method of selecting the number of components to keep \
55 is carried out by randomising the row order of each column in the environmental \
56 matrix and then obtaining the eigen value for the randomised matrix. \
57 This is repeatedly carried out for the amount of times specified by the user here.",
68 "Number of standard deviations",
70 "The number of standard deviations added to the randomised eigen value.",
71 "When all the eigen values for the 'shuffled' environmental matrix have been summed \
72 this number of standard deviations is added to the mean of the eigen values. \
73 Any components whose eigen values are above this threshold are retained.",
84 "Minimum number of components in model",
86 "The minimum number of components that the model must have.",
87 "If not enough components are selected, the model produced will be erroneous or fail. \
88 Usually three or more components are acceptable",
98 "Show very detailed debugging info",
100 "Warning this will cause a large amount of information to be printed ",
101 "Set this to 1 to show extremely verbose diagnostics. \
102 Set this to 0 to disable verbose diagnostics (this is default behaviour).",
118 "Climate Space Model",
121 "Climate Space Model [CSM] is a principle components based \
122 algorithm developed by Dr. Neil Caithness",
124 "Climate Space Model [CSM] is a principle components based \
125 algorithm developed by Dr. Neil Caithness. The component \
126 selection process int this algorithm implementation is \
127 based on the Broken-Stick cutoff where any component with \
128 an eigenvalue less than (n stddevs above a randomised sample) is discarded. \n\
129 The original CSM was written as series of Matlab functions. ",
132 "Robertson M.P., Caithness N., Villet M.H. (2001) A PCA-based modelling technique for predicting environmental suitability for organisms from presence records. Diversity and Distributions, 7:15-27",
134 "Tim Sutton, Renato De Giovanni",
135 "t.sutton [at] reading.ac.uk",
174 const gsl_rng_type * myRngType;
176 myRngType = gsl_rng_default;
220 if ( ! ( myTempInt == 0 || myTempInt == 1 ) )
222 Log::instance()->
warn(
"CSM - Broken Stick - Verbose debugging parameter out of range: %d\n",
238 if ( numberOfRandomisationsInt <= 0 || numberOfRandomisationsInt > 1000 )
240 Log::instance()->
warn(
"CSM - Broken Stick - Randomisations parameter out of range: %f\n",
244 if ( numberOfStdDevsFloat<= -10 || numberOfStdDevsFloat> 10 )
246 Log::instance()->
warn(
"CSM - Broken Stick - StandardDeviations parameter out of range: %f\n",
250 if (minComponentsInt < 1 ||minComponentsInt > 20 )
252 Log::instance()->
warn(
"CSM - Broken Stick - MinComponents parameter out of range: %f\n",
265 gsl_matrix * m = gsl_matrix_alloc (size1, size2);
266 for (
unsigned int j=0; j < m->size2; j++)
268 for (
unsigned int k=0; k < m->size1; k++)
271 gsl_matrix_set(m,k,j,myNo);
282 gsl_matrix * myOuputMatrix = gsl_matrix_alloc (original_matrix->size1, original_matrix->size2);
285 for (
unsigned int j=0; j < m->size2; j++)
288 gsl_vector * myRandomColumnVector = gsl_vector_alloc (m->size1);
289 gsl_vector * myOriginalColumnVector = gsl_vector_alloc (original_matrix->size1);
290 gsl_matrix_get_col (myRandomColumnVector, m, j);
291 gsl_matrix_get_col (myOriginalColumnVector, original_matrix, j);
292 gsl_permutation * myPermutation = gsl_permutation_alloc(m->size1);
294 gsl_sort_vector_index(myPermutation,myRandomColumnVector);
297 for (
unsigned int k=0; k < m->size1; k++)
299 double myDouble = gsl_vector_get(myOriginalColumnVector,myPermutation->data[k]);
303 gsl_matrix_set(myOuputMatrix,k,j,myDouble);
307 gsl_permutation_free (myPermutation);
308 gsl_vector_free (myRandomColumnVector);
309 gsl_vector_free (myOriginalColumnVector);
315 return myOuputMatrix;
324 gsl_matrix * myMatrixOfEigenValueVectors =
344 gsl_vector * myGslEigenValueVector = gsl_vector_alloc (m->size2);
345 gsl_matrix * myGslEigenVectorMatrix = gsl_matrix_alloc (m->size2, m->size2);
347 gsl_eigen_symmv_workspace * myWorkpace = gsl_eigen_symmv_alloc (m->size2);
348 gsl_eigen_symmv (myGslCovarianceMatrix,
349 myGslEigenValueVector,
350 myGslEigenVectorMatrix,
353 gsl_eigen_symmv_free (myWorkpace);
354 displayVector(myGslEigenValueVector,
"Randomised eigenvaluevector");
358 gsl_eigen_symmv_sort (myGslEigenValueVector, myGslEigenVectorMatrix,
359 GSL_EIGEN_SORT_VAL_DESC);
363 gsl_matrix_set_row (myMatrixOfEigenValueVectors,i,myGslEigenValueVector);
366 gsl_vector_free (myGslEigenValueVector);
367 gsl_matrix_free(myGslEigenVectorMatrix);
369 gsl_matrix_free(myGslCovarianceMatrix);
376 gsl_vector * myMeanVector = gsl_vector_alloc(myMatrixOfEigenValueVectors->size2);
377 gsl_vector * myStdDevVector = gsl_vector_alloc(myMatrixOfEigenValueVectors->size2);;
379 displayVector (myMeanVector,
"Mean of randomised Eigen Vectors");
381 gsl_vector * myMeanPlusStdDevsVector = gsl_vector_alloc(myMeanVector->size);
382 for (
unsigned int i=0; i<myMeanVector->size; ++i)
384 double myMean = gsl_vector_get (myMeanVector,i);
385 double myStdDev = gsl_vector_get (myStdDevVector,i);
388 displayVector (myMeanPlusStdDevsVector,
"Mean of randomised Eigen Vectors plus std deviation");
400 double sumOfEigenValues = 0;
402 for (
unsigned int i=0; i<myMeanPlusStdDevsVector->size; ++i)
404 double cmpValue = gsl_vector_get(myMeanPlusStdDevsVector,i);
422 gsl_vector_free (myMeanVector);
423 gsl_vector_free (myStdDevVector);
424 gsl_vector_free (myMeanPlusStdDevsVector);
425 gsl_matrix_free (myMatrixOfEigenValueVectors);
434 gsl_vector * tmp_gsl_eigenvalue_vector = gsl_vector_alloc (
_layer_count);
451 int myCurrentColInt=0;
454 double cmpValue = gsl_vector_get (tmp_gsl_eigenvalue_vector,j);
455 if (cmpValue > gsl_vector_get (myMeanPlusStdDevsVector,j))
461 gsl_vector * tmp_gsl_vector = gsl_vector_alloc (
_layer_count);
462 gsl_matrix_get_col (tmp_gsl_vector, tmp_gsl_eigenvector_matrix, j);
464 gsl_vector_free (tmp_gsl_vector);
470 gsl_vector_free (tmp_gsl_eigenvalue_vector);
471 gsl_matrix_free (tmp_gsl_eigenvector_matrix);
474 gsl_vector_free (myMeanVector);
475 gsl_vector_free (myStdDevVector);
476 gsl_vector_free (myMeanPlusStdDevsVector);
477 gsl_matrix_free (myMatrixOfEigenValueVectors);
float numberOfStdDevsFloat
void warn(const char *format,...)
'Warn' level.
OM_ALG_DLL_EXPORT AlgMetadata const * algorithmMetadata()
gsl_matrix * _gsl_eigenvector_matrix
static Log * instance()
Returns the instance pointer, creating the object on the first call.
gsl_rng * _randomNumberGenerator
int calculateMeanAndSd(gsl_matrix *theMatrix, gsl_vector *theMeanVector, gsl_vector *theStdDevVector)
bool verboseDebuggingBool
int _retained_components_count
gsl_vector * _gsl_eigenvalue_vector
int getParameter(std::string const &name, std::string *value)
void displayMatrix(const gsl_matrix *m, const char *name, const bool roundFlag=true) const
static AlgParamMetadata parameters[NUM_PARAM]
gsl_matrix * autoCovariance(gsl_matrix *m)
gsl_matrix * randomiseColumns(gsl_matrix *original_matrix)
void displayVector(const gsl_vector *v, const char *name, const bool roundFlag=true) const
OM_ALG_DLL_EXPORT AlgorithmImpl * algorithmFactory()
static AlgMetadata metadata
gsl_matrix * _gsl_environment_matrix
gsl_matrix * createRandomMatrix(int size1, int size2)
int numberOfRandomisationsInt
void debug(const char *format,...)
'Debug' level.