17 #include <gsl/gsl_statistics.h>
18 #include <gsl/gsl_math.h>
19 #include <gsl/gsl_permutation.h>
20 #include <gsl/gsl_matrix.h>
21 #include <gsl/gsl_eigen.h>
22 #include <gsl/gsl_blas.h>
23 #include <gsl/gsl_linalg.h>
24 #include <gsl/gsl_errno.h>
25 #include <gsl/gsl_sort_vector.h>
34 #define BACKGROUND_ID "NumberOfBackgroundPoints"
35 #define NUM_RETRIES "NumberOfRetries"
36 #define DISCARD_METHOD "DiscardMethod"
37 #define RETAIN_COMPONENTS "RetainComponents"
38 #define RETAIN_VARIATION "RetainVariation"
39 #define DISPLAY_LOADINGS "DisplayLoadings"
40 #define VERBOSE_DEBUG "VerboseDebug"
46 "Number of background sample points",
48 "Number of background points to be sampled when estimating the mean and standard deviation.",
49 "The ENFA algorithm compares the species presence data with the background environment. This requires the calculation of the mean, standard deviation and covariances of each environmental layer. This could be prohibitively slow and expensive for large datasets, so estimate these values by randomly sampling X points from the background data.",
58 "Number of retries of model",
60 "Number of attempted retries in the case that the model generation fails.",
61 "The algorithm requires the inversion of several matrices, but sometimes the matrix to invert is singular and so the algorithm fails. This seems to occur when the background data is undersampled or not representative. Often retrying the model generation (i.e. resampling the background data) makes this problem go away.",
70 "Method for discarding components",
72 "Method for discarding components: 0 - Fixed number (set by RETAIN_COMPONENTS), 1 - Minimum components explaining a fixed variation (set by RETAIN_VARIATION), 2 - Broken stick method",
73 " 0 - Retain a fixed number of components defined by the variable RETAIN_COMPONENTS\n 1 - Retain the top N components that cumulatively explain the level of variation defined by the RETAIN_VARIATION variable\n 2 - Compare the observed explanation of variation to the broken stick distribution retaining those components explaining higher levels of variation",
82 "Number of components to retain",
84 "Specify the number of components to retain (only for DISCARD_METHOD=0)",
85 "If the Discard_method=0, then this variable is used to determine the number of components to retain.",
94 "Percent varition for component retention",
96 "Specify the amount of variation that the retained components should explain (only for DISCARD_METHOD=1)",
97 "If the Discard_method=1, then this variable is used to determine the number of components to retain, by taking those components that cumulatively account for at least this much variation.",
106 "Display variable loadings for each factor",
108 "Output a comma separated matrix of variable loadings for each retained factor",
109 "Set to 1 to display the matrix. Var=variable, Marg=Marginality (factor 0), Sp-1=Specialisation factor 1, etc. Variables are numbered in the order they are input in the request file.",
118 "Verbose printing for debugging",
120 "Print lots of details",
134 "ENFA (Ecological-Niche Factor Analysis)",
136 "Algorithm based on presence only data using a modified principal components analysis.",
137 "Ecological-Niche Factor Analysis (Hirzel et al, 2002) uses a modified principal components analysis to develop a model based on presence only data. The observed environment is compared to the background data of the study area (note that absence points in the occurrence file are treated as background data). The analysis produces factors similar to a PCA. The first factor is termed the 'marginality' of the species, marginality is defined as the ecological distance between the species optimum and the mean habitat within the background data. Other factors are termed the 'specialization', and are defined as the ratio of the ecological variance in mean habitat to that observed for the target species. Model projection uses the geomeans method of Hirzel & Arlettaz (2003)",
138 "Hirzel, A.H.; Hausser, J.; Chessel, D. & Perrin, N.",
139 "Hirzel, A.H.; Hausser, J.; Chessel, D. & Perrin, N. Ecological-niche factor analysis: How to compute habitat-suitability maps without absence data? Ecology, 2002, 83, 2027-2036\nHirzel, A.H & Arlettaz, R. Modeling habitat suitability for complex species distributions by environmental distance geometric mean Environmental Management, 2003, 32, 614-623\n",
141 "chris.yesson [at] ioz.ac.uk",
175 _gsl_environment_matrix = 0;
176 _gsl_environment_factor_matrix = 0;
177 _gsl_background_matrix = 0;
178 _gsl_covariance_matrix = 0;
179 _gsl_covariance_background_matrix = 0;
181 _gsl_stddev_vector = 0;
182 _gsl_avg_background_vector = 0;
183 _gsl_stddev_background_vector = 0;
184 _gsl_eigenvalue_vector = 0;
185 _gsl_eigenvector_matrix = 0;
186 _gsl_score_matrix = 0;
187 _gsl_covariance_matrix_root_inverse = 0;
188 _gsl_workspace_H = 0;
189 _gsl_workspace_W = 0;
190 _gsl_workspace_y = 0;
191 _gsl_workspace_z = 0;
192 _gsl_factor_weights_all_components = 0;
193 _gsl_factor_weights = 0;
194 _gsl_geomean_vector = 0;
203 if (_gsl_background_matrix)
205 gsl_matrix_free (_gsl_background_matrix);
207 if (_gsl_covariance_background_matrix)
209 gsl_matrix_free (_gsl_covariance_background_matrix);
211 if (_gsl_covariance_matrix)
213 gsl_matrix_free (_gsl_covariance_matrix);
215 if (_gsl_covariance_matrix_root_inverse)
217 gsl_matrix_free (_gsl_covariance_matrix_root_inverse);
219 if (_gsl_eigenvector_matrix)
221 gsl_matrix_free (_gsl_eigenvector_matrix);
223 if (_gsl_environment_factor_matrix)
225 gsl_matrix_free (_gsl_environment_factor_matrix);
227 if (_gsl_environment_matrix)
229 gsl_matrix_free (_gsl_environment_matrix);
231 if (_gsl_score_matrix)
233 gsl_matrix_free (_gsl_score_matrix);
235 if (_gsl_workspace_H)
237 gsl_matrix_free (_gsl_workspace_H);
239 if (_gsl_workspace_W)
241 gsl_matrix_free (_gsl_workspace_W);
243 if (_gsl_workspace_y)
245 gsl_matrix_free (_gsl_workspace_y);
249 gsl_vector_free (_gsl_avg_vector);
251 if (_gsl_avg_background_vector)
253 gsl_vector_free (_gsl_avg_background_vector);
255 if (_gsl_eigenvalue_vector)
257 gsl_vector_free (_gsl_eigenvalue_vector);
259 if (_gsl_stddev_vector)
261 gsl_vector_free (_gsl_stddev_vector);
263 if (_gsl_stddev_background_vector)
265 gsl_vector_free (_gsl_stddev_background_vector);
267 if (_gsl_workspace_z)
269 gsl_vector_free (_gsl_workspace_z);
271 if (_gsl_factor_weights)
273 gsl_vector_free (_gsl_factor_weights);
275 if (_gsl_factor_weights_all_components)
277 gsl_vector_free (_gsl_factor_weights_all_components);
279 if (_gsl_geomean_vector)
281 gsl_vector_free (_gsl_geomean_vector);
301 int Enfa::initialize()
309 _layer_count = _samp->numIndependent();
311 _localityCount = _samp->numPresence();
313 if (_localityCount<2)
315 Log::instance()->
warn(
"ENFA needs at least two occurrence points..aborting...\n" );
322 if ( _samp->numAbsence() )
324 _backgroundProvided=1;
325 _backgroundCount = _samp->numAbsence();
326 Log::instance()->
info(
"Using background data provided (%i records marked as absences in the occurrence file)\n",_backgroundCount);
332 _backgroundProvided=0;
336 getParameter( NUM_RETRIES, &_numRetries );
337 getParameter( DISCARD_METHOD, &_discardMethod);
338 getParameter( RETAIN_COMPONENTS, &_retainComponents);
339 getParameter( RETAIN_VARIATION, &_retainVariation);
340 getParameter( DISPLAY_LOADINGS, &_displayLoadings);
341 getParameter( VERBOSE_DEBUG, &_verboseDebug);
349 while (_retryCount<_numRetries && !myFlag)
358 if (!SamplerToMatrix())
365 if (!BackgroundToMatrix())
375 catch (InverseFailedException& exception)
396 int Enfa::SamplerToMatrix()
398 if (_localityCount < 1)
407 _gsl_environment_matrix = gsl_matrix_alloc (_localityCount, _layer_count);
410 for (
int i=0; pit != fin; ++pit, ++i)
412 for (
int j=0;j<_layer_count;j++)
415 double myCellValue = (double)(*pit)->environment()[j];
416 gsl_matrix_set (_gsl_environment_matrix,i,j,myCellValue);
425 int Enfa::BackgroundToMatrix()
438 if (_backgroundProvided==1)
440 _ocbg = _samp->getAbsences();
444 _ocbg = _samp->getPseudoAbsences(_backgroundCount, 0, 1,
false,
false);
447 pit = _ocbg->begin();
451 _gsl_background_matrix = gsl_matrix_alloc (_backgroundCount, _layer_count);
453 for (
int i=0; pit != fin; ++pit, ++i)
455 for (
int j=0;j<_layer_count;j++)
458 double myCellValue = (double)(*pit)->environment()[j];
459 gsl_matrix_set (_gsl_background_matrix,i,j,myCellValue);
469 int Enfa::calculateMeanAndSd(gsl_matrix * theMatrix,
470 gsl_vector * theMeanVector,
471 gsl_vector * theStdDevVector)
474 assert (theMatrix != 0);
475 assert (theMeanVector !=0);
476 assert (theStdDevVector !=0);
479 gsl_vector_set_zero(theMeanVector);
482 gsl_vector_set_zero(theStdDevVector);
485 for (
int j = 0; j < _layer_count; j++)
488 gsl_vector_view myColumn = gsl_matrix_column (theMatrix, j);
490 double myAverage = gsl_stats_mean (myColumn.vector.data, myColumn.vector.stride, myColumn.vector.size);
492 gsl_vector_set (theMeanVector,j,myAverage);
494 double myStdDev = gsl_stats_sd (myColumn.vector.data, myColumn.vector.stride, myColumn.vector.size);
496 gsl_vector_set (theStdDevVector,j,myStdDev);
502 int Enfa::center(gsl_matrix * theMatrix,
506 assert (theMatrix != 0);
515 int msize=theMatrix->size1;
516 for (
int j=0;j<_layer_count;j++)
519 double myAverage = gsl_vector_get (_gsl_avg_background_vector,j);
520 double myStdDev = gsl_vector_get (_gsl_stddev_background_vector,j);
522 for (
int i=0;i<msize;++i)
524 double myDouble = gsl_matrix_get (theMatrix,i,j);
527 myDouble = (myDouble-myAverage)/myStdDev;
534 gsl_matrix_set(theMatrix,i,j,myDouble);
559 int Enfa::done()
const
583 gsl_vector * tmp_raw_gsl_vector = gsl_vector_alloc (_layer_count);
585 gsl_vector * tmp_centered_gsl_vector = gsl_vector_alloc(_layer_count);
586 double m1, m2, m3, m4;
588 for (ii=0;ii<_layer_count;++ii)
590 myFloat =
static_cast<float>(x[ii]);
591 gsl_vector_set (tmp_raw_gsl_vector,ii,myFloat);
594 m2=gsl_vector_get(_gsl_avg_background_vector,ii);
595 m3=gsl_vector_get(_gsl_stddev_background_vector,ii);
598 gsl_vector_set(tmp_centered_gsl_vector, ii, m4);
607 gsl_vector_free(tmp_raw_gsl_vector);
610 gsl_matrix * tmp_centered_gsl_matrix = gsl_matrix_alloc(1,_layer_count);
611 for (ii=0;ii<_layer_count;++ii)
612 gsl_matrix_set(tmp_centered_gsl_matrix,0,ii,gsl_vector_get(tmp_centered_gsl_vector,ii));
614 gsl_vector_free(tmp_centered_gsl_vector);
616 gsl_matrix * tmp_factored_gsl_matrix = gsl_matrix_alloc(1,_layer_count);
622 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans,
623 1.0, tmp_centered_gsl_matrix, _gsl_score_matrix,
624 0.0, tmp_factored_gsl_matrix);
626 gsl_vector* tmp_factored_gsl_vector = gsl_vector_alloc(_layer_count);
627 for (ii=0;ii<_layer_count;++ii)
628 gsl_vector_set(tmp_factored_gsl_vector,ii,
629 gsl_matrix_get(tmp_factored_gsl_matrix,0,ii));
635 double tmp_geomean = getGeomean(tmp_factored_gsl_vector);
637 gsl_matrix_free(tmp_centered_gsl_matrix);
638 gsl_matrix_free(tmp_factored_gsl_matrix);
639 gsl_vector_free(tmp_factored_gsl_vector);
656 cellcount=_localityCount;
657 for (ii=_localityCount-1; ii>=0; --ii)
658 if (gsl_vector_get(_gsl_geomean_vector,ii)<=tmp_geomean)
660 cellcount=_localityCount-ii-1;
678 int Enfa::getConvergence(
Scalar *
const val )
const
688 void Enfa::displayVector(
const gsl_vector * v,
const char * name,
const bool roundFlag)
const
693 fprintf( stderr,
"\nDisplaying Vector rounded to 4 decimal places '%s' (%u): \n----------------------------------------------\n[ ", name, static_cast<unsigned int>(v->size) );
697 fprintf( stderr,
"\nDisplaying Vector '%s' (%u): \n----------------------------------------------\n[ ", name, static_cast<unsigned int>(v->size) );
702 for (
unsigned int i=0;i<v->size;++i)
707 double myDouble = gsl_vector_get (v,i);
710 fprintf( stderr,
"%.4g %s", myDouble, sep1 );
714 fprintf( stderr,
"%g %s", myDouble, sep1 );
717 fprintf( stderr,
"\n----------------------------------------------\n" );
723 void Enfa::displayMatrix(
const gsl_matrix * m,
const char * name,
const bool roundFlag)
const
727 fprintf( stderr,
"\nDisplaying Matrix '%s' (%u / %u): \n----------------------------------------------\n[\n", name, static_cast<unsigned int>(m->size1), static_cast<unsigned int>(m->size2) );
731 fprintf( stderr,
"\nDisplaying Matrix rounded to 6 decimal places '%s' (%u / %u): \n----------------------------------------------\n[\n", name, static_cast<unsigned int>(m->size1), static_cast<unsigned int>(m->size2) );
733 for (
unsigned int i=0;i<m->size1;++i)
738 for (
unsigned int j=0;j<m->size2;j++)
740 double myDouble = gsl_matrix_get (m,i,j);
742 if (j == m->size2 -1)
746 fprintf( stderr,
"%g %s ", myDouble, sep1 );
750 fprintf( stderr,
"%.6g %s ", myDouble, sep1 );
754 fprintf( stderr,
"%s\n", sep2 );
756 fprintf( stderr,
"]\n----------------------------------------------\n" );
761 void Enfa::displayLoadings(
const gsl_matrix * m,
const int f)
const
769 for (
signed int j=1;j<f;j++)
771 fprintf( stderr,
",Sp-%i", j);
774 fprintf( stderr,
"\n");
776 for (
unsigned int i=0;i<m->size1;++i)
780 for (
signed int j=0;j<f;j++)
782 double myDouble = gsl_matrix_get (m,i,j);
784 fprintf( stderr,
",%3.3f", myDouble);
786 fprintf( stderr,
"\n");
794 gsl_matrix* Enfa::sqrtm(gsl_matrix* original_matrix)
const
801 m_size = original_matrix->size1;
803 gsl_matrix* m = gsl_matrix_alloc (m_size, m_size);
804 gsl_matrix_memcpy (m, original_matrix);
807 gsl_vector* eigval_v = gsl_vector_alloc (m_size);
808 gsl_matrix* eigvect_m = gsl_matrix_alloc (m_size, m_size);
809 gsl_eigen_symmv_workspace * temp_v = gsl_eigen_symmv_alloc (m_size);
816 gsl_eigen_symmv_free(temp_v);
820 gsl_matrix* temp_m = gsl_matrix_alloc (m_size, m_size);
821 for (j=0; j<m_size; ++j) {
823 v=gsl_vector_get(eigval_v,j);
824 if (v<0 && v>-0.000001)
830 gsl_matrix_free(temp_m);
832 gsl_matrix_free(eigvect_m);
833 gsl_vector_free(eigval_v);
834 std::string msg =
"Enfa::sqrtm:Cannot calculate square root for matrix - model will fail\n", v;
836 throw InverseFailedException( msg.c_str() );
839 for (i=0; i<m_size; ++i) {
840 gsl_matrix_set(temp_m, i, j, gsl_matrix_get(eigvect_m,i,j)*lambda);
845 gsl_matrix* _root = gsl_matrix_alloc (m_size, m_size);
847 gsl_blas_dgemm(CblasNoTrans, CblasTrans,
848 1.0, temp_m, eigvect_m,
877 gsl_matrix_free(temp_m);
879 gsl_matrix_free(eigvect_m);
880 gsl_vector_free(eigval_v);
891 double Enfa::getGeomean(gsl_vector* v)
const
899 gsl_vector * tmp_v = gsl_vector_alloc(_retained_components_count);
900 gsl_vector * tmp_gsl_factor_weights = gsl_vector_alloc(_retained_components_count);
902 for (
int i=0; i<_retained_components_count; ++i)
904 gsl_vector_set(tmp_v, i,gsl_vector_get(v,i));
905 gsl_vector_set(tmp_gsl_factor_weights, i, gsl_vector_get(_gsl_factor_weights,i));
908 gsl_vector * tmp_distance_vector = gsl_vector_alloc(_retained_components_count);
911 gsl_vector_view tmp_matrix_row_view;
912 double tmp_geomean=1.0;
913 double tmp_dist_workspace;
916 for (
int i=0; i<_localityCount; ++i)
918 tmp_matrix_row_view = gsl_matrix_row(_gsl_environment_factor_matrix,i);
922 for (
int j=0; j<_retained_components_count; ++j)
923 gsl_vector_set(tmp_distance_vector, j,
924 gsl_vector_get(&tmp_matrix_row_view.vector,j));
928 gsl_vector_sub(tmp_distance_vector,tmp_v);
933 gsl_vector_mul(tmp_distance_vector, tmp_distance_vector);
936 gsl_vector_mul(tmp_distance_vector,tmp_gsl_factor_weights);
941 tmp_dist_workspace=pow(gsl_blas_dasum(tmp_distance_vector),0.5);
947 if (tmp_dist_workspace!=0) tmp_geomean+=log(tmp_dist_workspace);
956 tmp_geomean=exp(tmp_geomean/_localityCount);
961 gsl_vector_free(tmp_distance_vector);
962 gsl_vector_free(tmp_v);
963 gsl_vector_free(tmp_gsl_factor_weights);
969 gsl_matrix* Enfa::inverse(gsl_matrix* _m)
const
972 gsl_error_handler_t* old_handler = gsl_set_error_handler_off();
974 int m_size=_m->size1;
975 gsl_matrix*_mcopy = gsl_matrix_alloc(m_size, m_size);
976 gsl_matrix*_inverse = gsl_matrix_alloc(m_size, m_size);
977 gsl_matrix_set_identity(_inverse);
978 gsl_matrix_memcpy(_mcopy,_m);
979 _gsl_vector_view _MI;
982 choldecomp = gsl_linalg_cholesky_decomp(_mcopy);
986 for (
int i=0; i<m_size; i++)
988 _MI=gsl_matrix_row(_inverse,i);
989 cholsvx = gsl_linalg_cholesky_svx(_mcopy, &_MI.vector);
996 gsl_matrix *_testInverse = gsl_matrix_alloc(m_size, m_size);
997 gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,
998 _m,_inverse,0.0,_testInverse);
1001 for (
int i=0; i<m_size; i++)
1002 for (
int j=0; j<m_size; j++)
1005 myval=(int)(ceil(gsl_matrix_get(_testInverse,i,j)*pow((
double)10,6)-0.49999999)/pow((
double)10,6));
1006 mycomp = (i == j) ? 1 : 0;
1013 gsl_matrix_free(_mcopy);
1014 gsl_matrix_free(_testInverse);
1015 std::string msg =
"Enfa::inverse failed to invert matrix\n";
1017 throw InverseFailedException( msg.c_str() );
1021 gsl_matrix_free(_mcopy);
1022 gsl_matrix_free(_testInverse);
1025 gsl_set_error_handler (old_handler);
1033 gsl_matrix * Enfa::autoCovariance(gsl_matrix * original_matrix)
1037 int numrows = original_matrix->size1;
1038 int numcols = original_matrix->size2;
1041 gsl_matrix * m = gsl_matrix_alloc (numrows, numcols);
1042 gsl_matrix_memcpy (m, original_matrix);
1047 gsl_matrix_transpose(m);
1051 gsl_matrix * s = gsl_matrix_alloc (numrows, numcols);
1055 gsl_matrix_memcpy (m, s);
1059 gsl_vector * v = gsl_vector_alloc(numcols);
1061 for (
int i = 0; i < numcols; i++)
1065 for (j = 0; j < numrows; j++)
1067 val += gsl_matrix_get (m, j, i);
1070 gsl_vector_set (v, i, val);
1072 for (j = 0; j < numrows; j++)
1074 gsl_matrix_set_row (s, j, v);
1081 gsl_matrix_scale (s, (
double)1/numrows);
1084 gsl_matrix_sub (m, s);
1090 gsl_matrix * p = gsl_matrix_alloc (numcols, numcols);
1091 gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,
1095 gsl_matrix_scale (p, (
double)1/(numrows-1));
1097 gsl_matrix_free (m);
1098 gsl_matrix_free (s);
1107 int Enfa::discardComponents()
const
1109 double variationTotal=0;
1110 int returnValue=_layer_count;
1113 if (_discardMethod==0)
1116 returnValue=_retainComponents;
1117 for (
int i=0; i<_retainComponents; ++i)
1118 variationTotal+=gsl_vector_get(_gsl_factor_weights_all_components,i);
1121 else if (_discardMethod==1)
1123 Log::instance()->
info(
"Discarding components with variation method (1) (variation=%6.2f)\n", _retainVariation);
1124 for (
int i=0; i<_layer_count; ++i)
1126 variationTotal+=gsl_vector_get(_gsl_factor_weights_all_components,i);
1127 if (variationTotal>=_retainVariation)
1136 else if (_discardMethod==2)
1145 gsl_vector* _brokenStickDist = gsl_vector_alloc(_layer_count-1);
1146 for (
int i=0; i<_layer_count-1; ++i)
1149 for (
int j=i+1; j<=_layer_count-1; ++j)
1150 tmp_total+=(
double)1/j;
1151 gsl_vector_set(_brokenStickDist,i, (
double)tmp_total/(_layer_count-1));
1158 for (
int i=0; i<_layer_count-1; ++i)
1162 myVariation=gsl_vector_get(_gsl_factor_weights_all_components,i+1)/(1-gsl_vector_get(_gsl_factor_weights_all_components,0));
1165 if (myVariation <= gsl_vector_get(_brokenStickDist,i))
1169 Log::instance()->
warn(
"First specialisation component explains less variation than the broken stick distribution - retaining all components by default\n" );
1171 returnValue=_layer_count;
1180 variationTotal+=myVariation;
1182 gsl_vector_free(_brokenStickDist);
1186 Log::instance()->
warn(
"Unknown problem whilst discarding components, retaining all components by default\n");
1189 Log::instance()->
info(
"Retained components: %i/%i, variation explained: %.2f\n", returnValue, _layer_count, variationTotal);
1201 _gsl_avg_background_vector = gsl_vector_alloc (_gsl_background_matrix->size2);
1202 _gsl_stddev_background_vector = gsl_vector_alloc (_gsl_background_matrix->size2) ;
1203 calculateMeanAndSd(_gsl_background_matrix,_gsl_avg_background_vector,_gsl_stddev_background_vector);
1207 displayVector(_gsl_avg_background_vector,
"Background Means",
true);
1208 displayVector(_gsl_stddev_background_vector,
"Background Stdev",
true);
1212 _gsl_avg_vector = gsl_vector_alloc (_gsl_environment_matrix->size2);
1213 _gsl_stddev_vector = gsl_vector_alloc (_gsl_environment_matrix->size2) ;
1214 calculateMeanAndSd(_gsl_environment_matrix,_gsl_avg_vector,_gsl_stddev_vector);
1218 displayVector(_gsl_avg_vector,
"Species Means",
true);
1219 displayVector(_gsl_stddev_vector,
"Species Stdev",
true);
1223 center(_gsl_environment_matrix,_localityCount);
1224 center(_gsl_background_matrix,_backgroundCount);
1229 calculateMeanAndSd(_gsl_environment_matrix,_gsl_avg_vector,_gsl_stddev_vector);
1233 displayVector(_gsl_avg_vector,
"Species Means - centered",
true);
1234 displayVector(_gsl_stddev_vector,
"Species Stdev - centered",
true);
1239 _gsl_covariance_matrix = autoCovariance(_gsl_environment_matrix);
1241 displayMatrix(_gsl_covariance_matrix,
"Species Covariance Matrix",
true);
1244 _gsl_covariance_background_matrix = autoCovariance(_gsl_background_matrix);
1246 displayMatrix(_gsl_covariance_background_matrix,
"Background Covariance Matrix",
true);
1249 _gsl_covariance_matrix_root_inverse = gsl_matrix_alloc(_gsl_covariance_matrix->size1,_gsl_covariance_matrix->size2);
1250 gsl_matrix* gsl_sqrtm_matrix = sqrtm(_gsl_covariance_matrix);
1251 _gsl_covariance_matrix_root_inverse = inverse(gsl_sqrtm_matrix);
1252 gsl_matrix_free(gsl_sqrtm_matrix);
1255 _gsl_workspace_z = gsl_vector_alloc (_layer_count);
1256 gsl_blas_dgemv (CblasNoTrans, 1.0, _gsl_covariance_matrix_root_inverse, _gsl_avg_vector, 0.0, _gsl_workspace_z);
1259 displayVector(_gsl_workspace_z,
"z",
true);
1262 gsl_matrix* _gsl_workspace_W_temp = gsl_matrix_alloc(_layer_count, _layer_count);
1263 _gsl_workspace_W = gsl_matrix_alloc(_layer_count, _layer_count);
1264 gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,
1265 _gsl_covariance_matrix_root_inverse,
1266 _gsl_covariance_background_matrix,0.0,
1267 _gsl_workspace_W_temp);
1269 gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,
1270 _gsl_workspace_W_temp,
1271 _gsl_covariance_matrix_root_inverse,0.0,
1274 gsl_matrix_free(_gsl_workspace_W_temp);
1277 displayMatrix(_gsl_workspace_W,
"W",
true);
1281 gsl_matrix_view _gsl_workspace_z_matrix_view;
1282 _gsl_workspace_z_matrix_view = gsl_matrix_view_vector(_gsl_workspace_z,
1285 gsl_blas_ddot(_gsl_workspace_z, _gsl_workspace_z, &zz);
1287 _gsl_workspace_y = gsl_matrix_alloc(1,_layer_count);
1288 gsl_matrix_memcpy(_gsl_workspace_y, &_gsl_workspace_z_matrix_view.matrix);
1289 gsl_matrix_scale(_gsl_workspace_y,pow(zz, -0.5));
1292 displayMatrix(_gsl_workspace_y,
"y",
true);
1296 _gsl_workspace_H = gsl_matrix_alloc(_layer_count,_layer_count);
1297 gsl_matrix* _gsl_workspace_H_temp1 = gsl_matrix_alloc(_layer_count,_layer_count);
1298 gsl_matrix* _gsl_workspace_H_temp2 = gsl_matrix_alloc(_layer_count,_layer_count);
1299 gsl_matrix* _gsl_workspace_H_temp3 = gsl_matrix_alloc(_layer_count,_layer_count);
1302 gsl_matrix_set_identity(_gsl_workspace_H_temp1);
1305 gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0,
1308 0.0, _gsl_workspace_H_temp2);
1311 gsl_matrix_sub(_gsl_workspace_H_temp1,_gsl_workspace_H_temp2);
1314 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
1315 _gsl_workspace_H_temp1,
1317 0.0, _gsl_workspace_H_temp3);
1320 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
1321 _gsl_workspace_H_temp3,
1322 _gsl_workspace_H_temp1,
1323 0.0, _gsl_workspace_H);
1325 gsl_matrix_free(_gsl_workspace_H_temp1);
1326 gsl_matrix_free(_gsl_workspace_H_temp2);
1327 gsl_matrix_free(_gsl_workspace_H_temp3);
1330 displayMatrix(_gsl_workspace_H,
"H",
true);
1333 _gsl_eigenvalue_vector = gsl_vector_alloc (_layer_count);
1334 gsl_vector * _gsl_eigenvalue_vector_copy = gsl_vector_alloc (_layer_count);
1335 _gsl_eigenvector_matrix = gsl_matrix_alloc (_layer_count, _layer_count);
1336 gsl_eigen_symmv_workspace * _gsl_eigen_workpace = gsl_eigen_symmv_alloc (_layer_count);
1337 gsl_eigen_symmv (_gsl_workspace_H,
1338 _gsl_eigenvalue_vector,
1339 _gsl_eigenvector_matrix,
1340 _gsl_eigen_workpace);
1343 gsl_eigen_symmv_free (_gsl_eigen_workpace);
1346 _gsl_score_matrix=gsl_matrix_alloc(_layer_count,_layer_count);
1347 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0,
1348 _gsl_covariance_matrix_root_inverse,
1349 _gsl_eigenvector_matrix,
1350 0.0, _gsl_score_matrix);
1353 gsl_vector * _gsl_eigenvalue_vector_presort = gsl_vector_alloc(_layer_count);
1354 gsl_vector_memcpy(_gsl_eigenvalue_vector_presort, _gsl_eigenvalue_vector);
1355 gsl_eigen_symmv_sort (_gsl_eigenvalue_vector, _gsl_eigenvector_matrix,
1356 GSL_EIGEN_SORT_VAL_DESC);
1357 gsl_eigen_symmv_sort (_gsl_eigenvalue_vector_presort, _gsl_score_matrix,
1358 GSL_EIGEN_SORT_VAL_DESC);
1368 _gsl_vector_min = gsl_vector_min_index(_gsl_eigenvalue_vector);
1372 gsl_vector_memcpy (_gsl_eigenvalue_vector_copy,
1373 _gsl_eigenvalue_vector);
1376 for (
int i=1; i<_layer_count; i++)
1378 if (_gsl_vector_min>=i)
1380 gsl_vector_set(_gsl_eigenvalue_vector,i,
1381 gsl_vector_get(_gsl_eigenvalue_vector_copy,i-1));
1388 gsl_vector_free(_gsl_eigenvalue_vector_copy);
1392 for (
int i=0; i<_layer_count; ++i) traceW+=gsl_matrix_get(_gsl_workspace_W, i, i);
1394 for (
int i=0; i<_layer_count; ++i) traceH+=gsl_matrix_get(_gsl_workspace_H, i, i);
1395 gsl_vector_set(_gsl_eigenvalue_vector,0,traceW-traceH);
1402 gsl_matrix * _gsl_score_matrix_copy=gsl_matrix_alloc(_layer_count,_layer_count);
1403 gsl_matrix_memcpy(_gsl_score_matrix_copy, _gsl_score_matrix);
1405 for (
int i=0; i<_layer_count; ++i)
1406 for (
int j=0; j<_layer_count; ++j)
1410 gsl_matrix_set(_gsl_score_matrix,i,j,gsl_vector_get(_gsl_avg_vector,i));
1412 else if (_gsl_vector_min>=j)
1414 gsl_matrix_set(_gsl_score_matrix,i,j,
1415 gsl_matrix_get(_gsl_score_matrix_copy,i,j-1));
1419 gsl_matrix_free(_gsl_score_matrix_copy);
1424 for (
int i=0; i<_layer_count; ++i)
1428 for (
int j=0; j<_layer_count; ++j)
1429 unorm+=pow(gsl_matrix_get(_gsl_score_matrix,j,i),2);
1430 unorm=pow(unorm,0.5);
1435 for (
int j=0; j<_layer_count; ++j)
1437 score2norm = gsl_matrix_get(_gsl_score_matrix,j,i);
1439 score2norm = score2norm/unorm;
1441 gsl_matrix_set(_gsl_score_matrix,j,i,score2norm);
1447 displayVector(_gsl_eigenvalue_vector,
"Eigenvalues",
true);
1448 displayMatrix(_gsl_eigenvector_matrix,
"Eigenvectors",
true);
1449 displayMatrix(_gsl_score_matrix,
"Score Matrix",
true);
1457 for (
int i=0; i<_layer_count; ++i)
1458 _marginality+=pow(gsl_vector_get(_gsl_avg_vector,i),2);
1459 _marginality=pow(_marginality,0.5)/1.96;
1464 _specialisation=pow((gsl_blas_dasum(_gsl_eigenvalue_vector)/_layer_count),0.5);
1470 _gsl_factor_weights_all_components = gsl_vector_alloc(_layer_count);
1471 for (
int i=0; i<_layer_count; ++i)
1473 gsl_vector_set(_gsl_factor_weights_all_components,i,
1474 fabs(gsl_vector_get(_gsl_eigenvalue_vector,i)));
1476 gsl_vector_scale(_gsl_factor_weights_all_components, 1.0/gsl_blas_dasum(_gsl_factor_weights_all_components));
1479 _retained_components_count=discardComponents();
1483 _gsl_factor_weights = gsl_vector_alloc(_layer_count);
1484 gsl_vector_set_zero(_gsl_factor_weights);
1485 for (
int i=0; i<_retained_components_count; ++i)
1487 gsl_vector_set(_gsl_factor_weights,i,
1488 fabs(gsl_vector_get(_gsl_eigenvalue_vector,i)));
1490 gsl_vector_scale(_gsl_factor_weights, 1.0/gsl_blas_dasum(_gsl_factor_weights));
1493 for (
int i=0; i<_retained_components_count; ++i)
1497 Log::instance()->
info(
"Marginality (factor 0) weight: %.2f, variation explained: %.2f\n", gsl_vector_get(_gsl_factor_weights,i), gsl_vector_get(_gsl_factor_weights_all_components,i));
1501 Log::instance()->
info(
"Specialisation factor %i weight: %.2f, variation explained: %.2f\n", i, gsl_vector_get(_gsl_factor_weights,i), gsl_vector_get(_gsl_factor_weights_all_components,i));
1506 if (_displayLoadings)
1508 displayLoadings(_gsl_score_matrix, _retained_components_count);
1513 _gsl_geomean_vector = gsl_vector_alloc(_localityCount);
1514 _gsl_environment_factor_matrix = gsl_matrix_alloc(_localityCount,_layer_count);
1515 gsl_blas_dgemm(CblasNoTrans, CblasNoTrans,
1516 1.0, _gsl_environment_matrix, _gsl_score_matrix,
1517 0.0, _gsl_environment_factor_matrix);
1519 gsl_vector_view _gsl_geomean_workspace;
1521 double _tmp_geomean;
1527 for (
int i=0; i<_localityCount; ++i)
1529 _gsl_geomean_workspace=gsl_matrix_row(_gsl_environment_factor_matrix,i);
1530 _tmp_geomean=getGeomean(&_gsl_geomean_workspace.vector);
1531 gsl_vector_set(_gsl_geomean_vector, i, _tmp_geomean);
1535 gsl_sort_vector(_gsl_geomean_vector);
1554 config->addSubsection( model_config );
1557 model_config->addNameValue(
"Marginality", _marginality );
1560 model_config->addNameValue(
"Specialisation", _specialisation );
1563 model_config->addNameValue(
"RetainedComponents", _retained_components_count );
1565 double *values =
new double[_layer_count];
1568 for (
int i=0; i < _layer_count; ++i)
1569 values[i] = gsl_vector_get(_gsl_avg_background_vector, i);
1571 model_config->addNameValue(
"AvgBackgroundVector", values, _layer_count );
1574 for (
int i=0; i < _layer_count; ++i)
1575 values[i] = gsl_vector_get(_gsl_stddev_background_vector, i);
1577 model_config->addNameValue(
"StddevBackgroundVector", values, _layer_count );
1580 for (
int i=0; i < _layer_count; ++i)
1581 values[i] = gsl_vector_get(_gsl_avg_vector, i);
1583 model_config->addNameValue(
"AvgVector", values, _layer_count );
1586 for (
int i=0; i < _layer_count; ++i)
1587 values[i] = gsl_vector_get(_gsl_stddev_vector, i);
1589 model_config->addNameValue(
"StddevVector", values, _layer_count );
1592 for (
int i=0; i < _layer_count; ++i)
1593 values[i] = gsl_vector_get(_gsl_eigenvalue_vector, i);
1595 model_config->addNameValue(
"EigenvalueVector", values, _layer_count );
1598 for (
int i=0; i < _layer_count; ++i)
1599 values[i] = gsl_vector_get(_gsl_factor_weights, i);
1601 model_config->addNameValue(
"FactorWeights", values, _layer_count );
1606 int num_cells = _layer_count * _layer_count;
1608 double *flat_eigenvector_matrix =
new double[num_cells];
1612 for (
int i=0; i < _layer_count; ++i)
1613 for (
int j=0; j < _layer_count; ++j, ++cnt)
1614 flat_eigenvector_matrix[cnt] = gsl_matrix_get( _gsl_eigenvector_matrix, i, j );
1616 model_config->addNameValue(
"EigenvectorMatrix", flat_eigenvector_matrix, num_cells );
1618 delete[] flat_eigenvector_matrix;
1621 num_cells = _layer_count * _layer_count;
1623 double *flat_score_matrix =
new double[num_cells];
1627 for (
int i=0; i < _layer_count; ++i)
1628 for (
int j=0; j < _layer_count; ++j, ++cnt)
1629 flat_score_matrix[cnt] = gsl_matrix_get( _gsl_score_matrix, i, j );
1631 model_config->addNameValue(
"ScoreMatrix", flat_score_matrix, num_cells );
1633 delete[] flat_score_matrix;
1636 num_cells = _localityCount * _layer_count;
1638 double *flat_environment_factor_matrix =
new double[num_cells];
1642 for (
int i=0; i < _localityCount; ++i)
1643 for (
int j=0; j < _layer_count; ++j, ++cnt)
1644 flat_environment_factor_matrix[cnt] = gsl_matrix_get( _gsl_environment_factor_matrix, i, j );
1646 model_config->addNameValue(
"EnvironmentFactorMatrix", flat_environment_factor_matrix, num_cells );
1648 delete[] flat_environment_factor_matrix;
1651 values =
new double[_localityCount];
1653 for (
int i=0; i < _localityCount; ++i)
1654 values[i] = gsl_vector_get(_gsl_geomean_vector, i);
1656 model_config->addNameValue(
"LocalityGeomeans", values, _localityCount );
1671 _retained_components_count = model_config->getAttributeAsInt(
"RetainedComponents", 0 );
1674 _marginality = model_config->getAttributeAsDouble(
"Marginality", 0.0 );
1677 _specialisation = model_config->getAttributeAsDouble(
"Specialisation", 0.0 );
1680 std::vector<double> stl_vector = model_config->getAttributeAsVecDouble(
"AvgVector" );
1682 _layer_count = stl_vector.size();
1684 _gsl_avg_vector = gsl_vector_alloc( _layer_count );
1686 for (
int i=0; i < _layer_count; ++i)
1687 gsl_vector_set( _gsl_avg_vector, i, stl_vector[i] );
1690 stl_vector = model_config->getAttributeAsVecDouble(
"AvgBackgroundVector" );
1692 _layer_count = stl_vector.size();
1694 _gsl_avg_background_vector = gsl_vector_alloc( _layer_count );
1696 for (
int i=0; i < _layer_count; ++i)
1697 gsl_vector_set( _gsl_avg_background_vector, i, stl_vector[i] );
1700 stl_vector = model_config->getAttributeAsVecDouble(
"StddevVector" );
1702 _gsl_stddev_vector = gsl_vector_alloc( _layer_count );
1704 for (
int i=0; i < _layer_count; ++i)
1705 gsl_vector_set( _gsl_stddev_vector, i, stl_vector[i] );
1708 stl_vector = model_config->getAttributeAsVecDouble(
"StddevBackgroundVector" );
1710 _gsl_stddev_background_vector = gsl_vector_alloc( _layer_count );
1712 for (
int i=0; i < _layer_count; ++i)
1713 gsl_vector_set( _gsl_stddev_background_vector, i, stl_vector[i] );
1716 stl_vector = model_config->getAttributeAsVecDouble(
"EigenvalueVector" );
1718 _layer_count = stl_vector.size();
1720 _gsl_eigenvalue_vector = gsl_vector_alloc( _layer_count );
1722 for (
int i=0; i < _layer_count; ++i)
1723 gsl_vector_set( _gsl_eigenvalue_vector, i, stl_vector[i] );
1726 stl_vector = model_config->getAttributeAsVecDouble(
"FactorWeights" );
1728 _layer_count = stl_vector.size();
1730 _gsl_factor_weights = gsl_vector_alloc( _layer_count );
1732 for (
int i=0; i < _layer_count; ++i)
1733 gsl_vector_set( _gsl_factor_weights, i, stl_vector[i] );
1736 stl_vector = model_config->getAttributeAsVecDouble(
"EigenvectorMatrix" );
1738 _gsl_eigenvector_matrix = gsl_matrix_alloc( _layer_count, _layer_count );
1742 for (
int i=0; i < _layer_count; ++i)
1743 for (
int j=0; j < _layer_count; ++j, ++cnt)
1744 gsl_matrix_set( _gsl_eigenvector_matrix, i, j, stl_vector[cnt] );
1748 stl_vector = model_config->getAttributeAsVecDouble(
"ScoreMatrix" );
1750 _gsl_score_matrix = gsl_matrix_alloc( _layer_count, _layer_count );
1754 for (
int i=0; i < _layer_count; ++i)
1755 for (
int j=0; j < _layer_count; ++j, ++cnt)
1756 gsl_matrix_set( _gsl_score_matrix, i, j, stl_vector[cnt] );
1760 stl_vector = model_config->getAttributeAsVecDouble(
"EnvironmentFactorMatrix" );
1762 _localityCount = stl_vector.size()/_layer_count;
1764 _gsl_environment_factor_matrix = gsl_matrix_alloc( _localityCount, _layer_count );
1768 for (
int i=0; i < _localityCount; ++i)
1769 for (
int j=0; j < _layer_count; ++j, ++cnt)
1770 gsl_matrix_set( _gsl_environment_factor_matrix, i, j, stl_vector[cnt] );
1774 stl_vector = model_config->getAttributeAsVecDouble(
"LocalityGeomeans" );
1776 _gsl_geomean_vector = gsl_vector_alloc( _localityCount );
1778 for (
int i=0; i < _localityCount; ++i)
1779 gsl_vector_set( _gsl_geomean_vector, i, stl_vector[i] );
void warn(const char *format,...)
'Warn' level.
double Scalar
Type of map values.
static AlgParamMetadata parameters[NUM_PARAM]
static Log * instance()
Returns the instance pointer, creating the object on the first call.
void error(const char *format,...)
'Error' level.
OM_ALG_DLL_EXPORT AlgMetadata const * algorithmMetadata()
static AlgMetadata metadata
void info(const char *format,...)
'Info' level.
std::vector< OccurrencePtr >::const_iterator const_iterator
OM_ALG_DLL_EXPORT AlgorithmImpl * algorithmFactory()
void debug(const char *format,...)
'Debug' level.