openModeller  Version 1.4.0
enfa.cpp
Go to the documentation of this file.
00001 //
00002 // Enfa
00003 //
00004 // Description:
00005 //
00006 //
00007 // Author: CRIA <chris.yesson@ioz.ac.uk>, (C) 2009
00008 //
00009 // Copyright: See COPYING file that comes with this distribution
00010 //
00011 //
00013 
00014 #include <string.h>
00015 #include <cassert>
00016 #include <math.h>
00017 #include <gsl/gsl_statistics.h>
00018 #include <gsl/gsl_math.h>
00019 #include <gsl/gsl_permutation.h>
00020 #include <gsl/gsl_matrix.h>
00021 #include <gsl/gsl_eigen.h>
00022 #include <gsl/gsl_blas.h>
00023 #include <gsl/gsl_linalg.h>
00024 #include <gsl/gsl_errno.h>
00025 #include <gsl/gsl_sort_vector.h>
00026 
00027 #include "enfa.hh"
00028 
00029 
00030 /****************************************************************/
00031 /********************** Algorithm's Metadata ********************/
00032 
00033 #define NUM_PARAM 7
00034 #define BACKGROUND_ID      "NumberOfBackgroundPoints"
00035 #define NUM_RETRIES        "NumberOfRetries"
00036 #define DISCARD_METHOD     "DiscardMethod"
00037 #define RETAIN_COMPONENTS  "RetainComponents"
00038 #define RETAIN_VARIATION   "RetainVariation"
00039 #define DISPLAY_LOADINGS   "DisplayLoadings"
00040 #define VERBOSE_DEBUG      "VerboseDebug"
00041 
00042 static AlgParamMetadata parameters[NUM_PARAM] = {
00043 
00044   {
00045     BACKGROUND_ID,                   // Id.
00046     "Number of background sample points", // Name.
00047     Integer,                     // Type.
00048     "Number of background points to be sampled when estimating the mean and standard deviation.", // Overview
00049     "The ENFA algorithm compares the species presence data with the background environment.  This requires the calculation of the mean, standard deviation and covariances of each environmental layer.  This could be prohibitively slow and expensive for large datasets, so estimate these values by randomly sampling X points from the background data.", // Description.
00050     1,         // Not zero if the parameter has lower limit.
00051     10,        // Parameter's lower limit.
00052     0,         // Not zero if the parameter has upper limit.
00053     0,         // Parameter's upper limit.
00054     "10000"        // Parameter's typical (default) value.
00055   },
00056   {
00057     NUM_RETRIES,                   // Id.
00058     "Number of retries of model", // Name.
00059     Integer,                     // Type.
00060     "Number of attempted retries in the case that the model generation fails.", // Overview
00061     "The algorithm requires the inversion of several matrices, but sometimes the matrix to invert is singular and so the algorithm fails.  This seems to occur when the background data is undersampled or not representative.  Often retrying the model generation (i.e. resampling the background data) makes this problem go away.", // Description.
00062     1,         // Not zero if the parameter has lower limit.
00063     1,        // Parameter's lower limit.
00064     0,         // Not zero if the parameter has upper limit.
00065     100,         // Parameter's upper limit.
00066     "5"        // Parameter's typical (default) value.
00067   },
00068   {
00069     DISCARD_METHOD,                   // Id.
00070     "Method for discarding components", // Name.
00071     Integer,                     // Type.
00072     "Method for discarding components: 0 - Fixed number (set by RETAIN_COMPONENTS), 1 - Minimum components explaining a fixed variation (set by RETAIN_VARIATION), 2 - Broken stick method", // Overview
00073     " 0 - Retain a fixed number of components defined by the variable RETAIN_COMPONENTS\n 1 - Retain the top N components that cumulatively explain the level of variation defined by the RETAIN_VARIATION variable\n 2 - Compare the observed explanation of variation to the broken stick distribution retaining those components explaining higher levels of variation", // Description.
00074     1,         // Not zero if the parameter has lower limit.
00075     0,        // Parameter's lower limit.
00076     1,         // Not zero if the parameter has upper limit.
00077     2,         // Parameter's upper limit.
00078     "2"        // Parameter's typical (default) value.
00079   },
00080   {
00081     RETAIN_COMPONENTS,                   // Id.
00082     "Number of components to retain", // Name.
00083     Integer,                     // Type.
00084     "Specify the number of components to retain (only for DISCARD_METHOD=0)", // Overview
00085     "If the Discard_method=0, then this variable is used to determine the number of components to retain.", // Description.
00086     1,         // Not zero if the parameter has lower limit.
00087     1,        // Parameter's lower limit.
00088     0,         // Not zero if the parameter has upper limit.
00089     100,         // Parameter's upper limit.
00090     "2"        // Parameter's typical (default) value.
00091   },
00092   {
00093     RETAIN_VARIATION,                   // Id.
00094     "Percent varition for component retention", // Name.
00095     Integer,                     // Type.
00096     "Specify the amount of variation that the retained components should explain (only for DISCARD_METHOD=1)", // Overview
00097     "If the Discard_method=1, then this variable is used to determine the number of components to retain, by taking those components that cumulatively account for at least this much variation.", // Description.
00098     1,         // Not zero if the parameter has lower limit.
00099     0.5,        // Parameter's lower limit.
00100     1,         // Not zero if the parameter has upper limit.
00101     1.0,         // Parameter's upper limit.
00102     "0.75"        // Parameter's typical (default) value.
00103   },
00104   {
00105     DISPLAY_LOADINGS,                   // Id.
00106     "Display variable loadings for each factor", // Name.
00107     Integer,                     // Type.
00108     "Output a comma separated matrix of variable loadings for each retained factor", // Overview
00109     "Set to 1 to display the matrix.  Var=variable, Marg=Marginality (factor 0), Sp-1=Specialisation factor 1, etc. Variables are numbered in the order they are input in the request file.", // Description.
00110     1,         // Not zero if the parameter has lower limit.
00111     0,        // Parameter's lower limit.
00112     1,         // Not zero if the parameter has upper limit.
00113     1,         // Parameter's upper limit.
00114     "0"        // Parameter's typical (default) value.
00115   },
00116   {
00117     VERBOSE_DEBUG,                   // Id.
00118     "Verbose printing for debugging", // Name.
00119     Integer,                     // Type.
00120     "Print lots of details", // Overview
00121     "", // Description.
00122     1,         // Not zero if the parameter has lower limit.
00123     0,        // Parameter's lower limit.
00124     1,         // Not zero if the parameter has upper limit.
00125     1,         // Parameter's upper limit.
00126     "0"        // Parameter's typical (default) value.
00127   },
00128 };
00129 
00130 /****************************************************************/
00131 /****************************** Enfa *****************************/
00132 static AlgMetadata metadata = { // General metadata
00133   "ENFA",                    // Id
00134   "ENFA (Ecological-Niche Factor Analysis)",     // Name
00135   "0.1.3",                        // Version
00136   "Algorithm based on presence only data using a modified principal components analysis.", // Overview
00137   "Ecological-Niche Factor Analysis (Hirzel et al, 2002) uses a modified principal components analysis to develop a model based on presence only data.  The observed environment is compared to the background data of the study area (note that absence points in the occurrence file are treated as background data).  The analysis produces factors similar to a PCA.  The first factor is termed the 'marginality' of the species, marginality is defined as the ecological distance between the species optimum and the mean habitat within the background data. Other factors are termed the 'specialization', and are defined as the ratio of the ecological variance in mean habitat to that observed for the target species.  Model projection uses the geomeans method of Hirzel & Arlettaz (2003)", // Description
00138   "Hirzel, A.H.; Hausser, J.; Chessel, D. & Perrin, N.",    // Algorithm author
00139   "Hirzel, A.H.; Hausser, J.; Chessel, D. & Perrin, N. Ecological-niche factor analysis: How to compute habitat-suitability maps without absence data? Ecology, 2002, 83, 2027-2036\nHirzel, A.H & Arlettaz, R. Modeling habitat suitability for complex species distributions by environmental distance geometric mean Environmental Management, 2003, 32, 614-623\n", // Bibliography
00140   "Chris Yesson",                     // Code author
00141   "chris.yesson [at] ioz.ac.uk",            // Code author's contact
00142   0,                    // Does not accept categorical data
00143   0,                    // Does not need (pseudo)absence points
00144   NUM_PARAM, parameters // Algorithm's parameters
00145 };
00146 
00147 
00148 
00149 /****************************************************************/
00150 /****************** Algorithm's factory function ****************/
00151 
00152 OM_ALG_DLL_EXPORT
00153 AlgorithmImpl *
00154 algorithmFactory()
00155 {
00156   return new Enfa();
00157 }
00158 
00159 OM_ALG_DLL_EXPORT
00160 AlgMetadata const *
00161 algorithmMetadata()
00162 {
00163   return &metadata;
00164 }
00165 
00166 
00171 Enfa::Enfa() :
00172     AlgorithmImpl( &metadata )
00173 {
00174   _initialized = 0;
00175   _gsl_environment_matrix = 0;
00176   _gsl_environment_factor_matrix = 0;
00177   _gsl_background_matrix = 0;
00178   _gsl_covariance_matrix = 0;
00179   _gsl_covariance_background_matrix = 0;
00180   _gsl_avg_vector = 0;
00181   _gsl_stddev_vector = 0;
00182   _gsl_avg_background_vector = 0;
00183   _gsl_stddev_background_vector = 0;
00184   _gsl_eigenvalue_vector = 0;
00185   _gsl_eigenvector_matrix = 0;
00186   _gsl_score_matrix = 0;
00187   _gsl_covariance_matrix_root_inverse = 0;
00188   _gsl_workspace_H = 0;
00189   _gsl_workspace_W = 0;
00190   _gsl_workspace_y = 0;
00191   _gsl_workspace_z = 0;
00192   _gsl_factor_weights_all_components = 0;
00193   _gsl_factor_weights = 0;
00194   _gsl_geomean_vector = 0;
00195 }
00196 
00197 
00199 Enfa::~Enfa()
00200 {
00201   if ( _initialized )
00202   {
00203     if (_gsl_background_matrix)
00204     {
00205       gsl_matrix_free (_gsl_background_matrix);
00206     }
00207     if (_gsl_covariance_background_matrix)
00208     {
00209       gsl_matrix_free (_gsl_covariance_background_matrix);
00210     }
00211     if (_gsl_covariance_matrix)
00212     {
00213       gsl_matrix_free (_gsl_covariance_matrix);
00214     }
00215     if (_gsl_covariance_matrix_root_inverse)
00216     {
00217       gsl_matrix_free (_gsl_covariance_matrix_root_inverse);
00218     }
00219     if (_gsl_eigenvector_matrix)
00220     {
00221       gsl_matrix_free (_gsl_eigenvector_matrix);
00222     }
00223     if (_gsl_environment_factor_matrix)
00224     {
00225       gsl_matrix_free (_gsl_environment_factor_matrix);
00226     }
00227     if (_gsl_environment_matrix)
00228     {
00229       gsl_matrix_free (_gsl_environment_matrix);
00230     }
00231     if (_gsl_score_matrix)
00232     {
00233       gsl_matrix_free (_gsl_score_matrix);
00234     }
00235     if (_gsl_workspace_H)
00236     {
00237       gsl_matrix_free (_gsl_workspace_H);
00238     }
00239     if (_gsl_workspace_W)
00240     {
00241       gsl_matrix_free (_gsl_workspace_W);
00242     }
00243     if (_gsl_workspace_y)
00244     {
00245       gsl_matrix_free (_gsl_workspace_y);
00246     }
00247     if (_gsl_avg_vector)
00248     {
00249       gsl_vector_free (_gsl_avg_vector);
00250     }
00251     if (_gsl_avg_background_vector)
00252     {
00253       gsl_vector_free (_gsl_avg_background_vector);
00254     }
00255     if (_gsl_eigenvalue_vector)
00256     {
00257       gsl_vector_free (_gsl_eigenvalue_vector);
00258     }
00259     if (_gsl_stddev_vector)
00260     {
00261       gsl_vector_free (_gsl_stddev_vector);
00262     }
00263     if (_gsl_stddev_background_vector)
00264     {
00265       gsl_vector_free (_gsl_stddev_background_vector);
00266     }
00267     if (_gsl_workspace_z)
00268     {
00269       gsl_vector_free (_gsl_workspace_z);
00270     }
00271     if (_gsl_factor_weights)
00272     {
00273       gsl_vector_free (_gsl_factor_weights);
00274     }
00275     if (_gsl_factor_weights_all_components)
00276     {
00277       gsl_vector_free (_gsl_factor_weights_all_components);
00278     }
00279     if (_gsl_geomean_vector)
00280     {
00281       gsl_vector_free (_gsl_geomean_vector);
00282     }
00283   }
00284 }
00285 
00286 
00287 //
00288 // Methods used to build the model
00289 //
00290 
00291 
00292 
00293 
00301 int Enfa::initialize()
00302 {
00303   _initialized = 1;
00304 
00305   Log::instance()->info( "Base ENFA class initializing\n" );
00306 
00307   //set the class member that holds the number of environmental variables
00308   //we subtract 1 because the first column contains the specimen count
00309   _layer_count = _samp->numIndependent();
00310   //set the class member that holds the number of occurences
00311   _localityCount = _samp->numPresence();
00312   //Log::instance()->info( "Checking more than 1 samples exist\n" );
00313   if (_localityCount<2)
00314   {
00315     Log::instance()->warn( "ENFA needs at least two occurrence points..aborting...\n" );
00316     _initialized = 0;
00317     return 0;
00318   }
00319 
00320   // _backgroundCount = _backgroundSamp->numOccurrences();
00321   //if ( _samp->numAbsence() )
00322   if ( _samp->numAbsence() )
00323     {
00324       _backgroundProvided=1;
00325       _backgroundCount = _samp->numAbsence();
00326       Log::instance()->info( "Using background data provided (%i records marked as absences in the occurrence file)\n",_backgroundCount);
00327 
00328     }
00329   else
00330     {
00331       getParameter( BACKGROUND_ID, &_backgroundCount );
00332       _backgroundProvided=0;
00333     }
00334 
00335   //use the pseudoabsence generator to do this
00336   getParameter( NUM_RETRIES, &_numRetries );
00337   getParameter( DISCARD_METHOD, &_discardMethod);
00338   getParameter( RETAIN_COMPONENTS, &_retainComponents);
00339   getParameter( RETAIN_VARIATION, &_retainVariation);
00340   getParameter( DISPLAY_LOADINGS, &_displayLoadings);
00341   getParameter( VERBOSE_DEBUG, &_verboseDebug);
00342 
00343   /* sometimes our random background samples produce singular matrices
00344      so catch this exception and restart the model */
00345 
00346   _retryCount=0;
00347   bool myFlag=false;
00348 
00349   while (_retryCount<_numRetries && !myFlag)
00350   {
00351 
00352       _retryCount+=1;
00353       try
00354       {
00355 
00356     //convert the sampler to a matrix and store in the local gsl_matrix
00357     //Log::instance()->info( "Converting samples to GSL_Matrix\n" );
00358     if (!SamplerToMatrix())
00359     {
00360         Log::instance()->warn( "All occurences are outside the masked area!\n" );
00361         _initialized = 0;
00362         return 0;
00363     }
00364 
00365     if (!BackgroundToMatrix())
00366     {
00367         Log::instance()->warn( "Failed to sample background data!\n" );
00368         _initialized = 0;
00369         return 0;
00370     }
00371 
00372     myFlag = enfa1();
00373 
00374       }
00375       catch (InverseFailedException& exception)
00376       {
00377     UNUSED(exception);
00378     Log::instance()->warn( "Model failed, retry number %i\n", _retryCount );
00379     myFlag=false;
00380       }
00381 
00382   }
00383 
00384   if (! myFlag)
00385   {
00386       Log::instance()->warn( "Model initialization failed!\n" );
00387   }
00388 
00389   return myFlag;
00390 }
00391 
00396 int Enfa::SamplerToMatrix()
00397 {
00398   if (_localityCount < 1)
00399   {
00400     return 0;
00401   }
00402 
00403   OccurrencesImpl::const_iterator pit = _samp->getPresences()->begin();
00404   OccurrencesImpl::const_iterator fin = _samp->getPresences()->end();
00405 
00406   // Allocate the gsl matrix to store environment data at each locality
00407   _gsl_environment_matrix = gsl_matrix_alloc (_localityCount, _layer_count);
00408 
00409   // now populate the gsl matrix from the sample data
00410   for (int i=0; pit != fin; ++pit, ++i)
00411   {
00412     for (int j=0;j<_layer_count;j++)
00413     {
00414       //we add one to j in order to omit the specimen count column
00415       double myCellValue = (double)(*pit)->environment()[j];
00416       gsl_matrix_set (_gsl_environment_matrix,i,j,myCellValue);
00417     }
00418   }
00419 
00420   //Log::instance()->info( "Enfa::SampleToMatrix: x: %i y: %i\n",_layer_count,_localityCount );
00421   return 1;
00422 }
00423 
00424 // sample background data and store it in a gsl_matrix
00425 int Enfa::BackgroundToMatrix()
00426 {
00427 
00428   //Log::instance()->info("Enfa:BackgroundToMatrix:Generating background samples.\n" );
00429 
00430   // try getting all background samples at once using the absence generator
00431   // this enables us to ensure geographic uniqueness
00432   // question: what happens when num-background > number of cells in env layer?
00433   // allow user to provide background points using the absence parser
00434   OccurrencesPtr _ocbg;
00435   OccurrencesImpl::const_iterator pit;
00436   OccurrencesImpl::const_iterator fin;
00437 
00438   if (_backgroundProvided==1)
00439   {
00440     _ocbg = _samp->getAbsences();
00441   }
00442   else
00443   {
00444     _ocbg = _samp->getPseudoAbsences(_backgroundCount, 0, 1, false, false);
00445   }
00446 
00447   pit = _ocbg->begin();
00448   fin = _ocbg->end();
00449 
00450   // Allocate the gsl matrix to store environment data at each background point
00451   _gsl_background_matrix = gsl_matrix_alloc (_backgroundCount, _layer_count);
00452   // now populate the gsl matrix from the sample data
00453   for (int i=0; pit != fin; ++pit, ++i)
00454   {
00455     for (int j=0;j<_layer_count;j++)
00456     {
00457       //we add one to j in order to omit the specimen count column
00458       double myCellValue = (double)(*pit)->environment()[j];
00459       gsl_matrix_set (_gsl_background_matrix,i,j,myCellValue);
00460     }
00461   }
00462 
00463   return 1;
00464 }
00465 
00466 
00467 
00469 int Enfa::calculateMeanAndSd(gsl_matrix * theMatrix,
00470                             gsl_vector * theMeanVector,
00471                             gsl_vector * theStdDevVector)
00472 {
00473 #ifndef WIN32
00474   assert (theMatrix != 0);
00475   assert (theMeanVector !=0);
00476   assert (theStdDevVector !=0);
00477 #endif
00478   //Initialise the vector to hold the mean of each column
00479   gsl_vector_set_zero(theMeanVector);
00480 
00481   //Initialise the vector to hold the stddev of each column
00482   gsl_vector_set_zero(theStdDevVector);
00483 
00484   //calculate the mean  and stddev of each column
00485   for (int j = 0; j < _layer_count; j++)
00486   {
00487     //get the current column from the array as a vector
00488     gsl_vector_view myColumn = gsl_matrix_column (theMatrix, j);
00489     //calculate the average for the column ...
00490     double myAverage = gsl_stats_mean (myColumn.vector.data, myColumn.vector.stride, myColumn.vector.size);
00491     // ...and assign it to the jth element in the column means vector
00492     gsl_vector_set (theMeanVector,j,myAverage);
00493     //calculate the stddev for the column and ...
00494     double myStdDev = gsl_stats_sd (myColumn.vector.data, myColumn.vector.stride, myColumn.vector.size);
00495     // ...and assign it to the jth element in the column stddev vector
00496     gsl_vector_set (theStdDevVector,j,myStdDev);
00497   }
00498 
00499   return 0;
00500 }
00501 
00502 int Enfa::center(gsl_matrix * theMatrix,
00503      int spCount)
00504 {
00505 #ifndef WIN32
00506   assert (theMatrix != 0);
00507 #endif
00508   //
00509   //Subtract the column mean from every value in each column
00510   //Divide each resultant column value by the stddev for that column
00511   //Note that we are walking the matrix column wise
00512   //
00513   //Log::instance()->info( "Centering data\n" );
00514   //for (int j=0;j<_layer_count;j++)
00515   int msize=theMatrix->size1;
00516   for (int j=0;j<_layer_count;j++)
00517   {
00518     //get the stddev and mean for this column
00519     double myAverage = gsl_vector_get (_gsl_avg_background_vector,j);
00520     double myStdDev = gsl_vector_get (_gsl_stddev_background_vector,j);
00521 
00522     for (int i=0;i<msize;++i)
00523     {
00524       double myDouble = gsl_matrix_get (theMatrix,i,j);
00525       if (myStdDev > 0)
00526       {
00527         myDouble = (myDouble-myAverage)/myStdDev;
00528       }
00529       else
00530       {
00531         myDouble=0.0;
00532       }
00533       //update the gsl_matrix with the new value
00534       gsl_matrix_set(theMatrix,i,j,myDouble);
00535     }
00536   }
00537 
00538   return 0;
00539 }
00540 
00541 
00546 int Enfa::iterate()
00547 {
00548   _done=1;
00549   return 1;
00550 }
00551 
00552 
00559 int Enfa::done() const
00560 {
00561   return _done;
00562 }
00563 
00564 
00565 //
00566 // Methods used to project the model
00567 //
00568 
00569 
00574 Scalar Enfa::getValue( const Sample& x ) const
00575 {
00576     int ii, cellcount;
00577     float myFloat;
00578 
00579     //Log::instance()->info( "Enfa:getValue:entering getvalue\n" );
00580 
00581     //first thing we do is convert the oM primitive env value array 
00582     // to a gsl vector so we can use matrix multplication with it
00583     gsl_vector * tmp_raw_gsl_vector = gsl_vector_alloc (_layer_count);
00584     // center data using the avg and stdev
00585     gsl_vector * tmp_centered_gsl_vector = gsl_vector_alloc(_layer_count);
00586     double m1, m2, m3, m4;
00587 
00588     for (ii=0;ii<_layer_count;++ii)
00589     {
00590   myFloat = static_cast<float>(x[ii]);
00591   gsl_vector_set (tmp_raw_gsl_vector,ii,myFloat);
00592 
00593   m1=myFloat;
00594   m2=gsl_vector_get(_gsl_avg_background_vector,ii);
00595   m3=gsl_vector_get(_gsl_stddev_background_vector,ii);
00596   m4=(m1-m2)/m3;
00597   //Log::instance()->info( "getValue: m1: %6.2f, m2: %6.2f, m3: %6.2f, m4: %6.2f, \n", m1, m2, m3, m4 );
00598   gsl_vector_set(tmp_centered_gsl_vector, ii, m4);
00599     }
00600 
00601     //if (_verboseDebug)
00602     //{
00603     //displayVector(tmp_raw_gsl_vector, "getvalue: tmp_raw_gsl_vector", true);
00604     //displayVector(tmp_centered_gsl_vector, "getvalue: tmp_centered_gsl_vector", true);
00605     //}
00606 
00607     gsl_vector_free(tmp_raw_gsl_vector);
00608 
00609     //vector for factored data
00610     gsl_matrix * tmp_centered_gsl_matrix = gsl_matrix_alloc(1,_layer_count);
00611     for (ii=0;ii<_layer_count;++ii)
00612       gsl_matrix_set(tmp_centered_gsl_matrix,0,ii,gsl_vector_get(tmp_centered_gsl_vector,ii));
00613 
00614     gsl_vector_free(tmp_centered_gsl_vector);
00615 
00616     gsl_matrix * tmp_factored_gsl_matrix = gsl_matrix_alloc(1,_layer_count);
00617     
00618     // multiply centered data by score matrix to get factored data 
00619     //gsl_blas_dgemv (CblasTrans, 1.0, _gsl_score_matrix, tmp_centered_gsl_vector, 0.0, tmp_factored_gsl_vector);
00620     //gsl_blas_dgemv (CblasNoTrans, 1.0, _gsl_score_matrix, tmp_centered_gsl_vector, 0.0, tmp_factored_gsl_vector);
00621 
00622     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans,
00623        1.0, tmp_centered_gsl_matrix, _gsl_score_matrix, 
00624        0.0, tmp_factored_gsl_matrix);
00625 
00626     gsl_vector* tmp_factored_gsl_vector = gsl_vector_alloc(_layer_count);
00627     for (ii=0;ii<_layer_count;++ii)
00628       gsl_vector_set(tmp_factored_gsl_vector,ii,
00629          gsl_matrix_get(tmp_factored_gsl_matrix,0,ii));
00630     
00631     //if (_verboseDebug)
00632     //displayVector(tmp_factored_gsl_vector, "getvalue: tmp_factored_gsl_vector", true);
00633 
00634     // work out geomean from this point to all specimen localites
00635     double tmp_geomean = getGeomean(tmp_factored_gsl_vector);
00636     
00637     gsl_matrix_free(tmp_centered_gsl_matrix);
00638     gsl_matrix_free(tmp_factored_gsl_matrix);
00639     gsl_vector_free(tmp_factored_gsl_vector);
00640 
00641     //Log::instance()->info( "getValue: geomean %6.2f\n", tmp_geomean );
00642     
00643 
00644     /* % OK, now to convert these into habitat suitability values.
00645        % Determine what percentage of the species presence points
00646        % are further away from zero than this point */
00647     
00648     //cellcount=0;
00649 
00650     //old version checking every value on unsorted vector
00651     //for (ii=0; ii<_localityCount; ++ii)
00652     //  if (gsl_vector_get(_gsl_geomean_vector,ii)>=tmp_geomean) cellcount+=1;
00653     
00654     // step up the sorted geomeans vector stop when current geomean is higher
00655     // quickest when most cells are of low suitability
00656     cellcount=_localityCount;
00657     for (ii=_localityCount-1; ii>=0; --ii)
00658       if (gsl_vector_get(_gsl_geomean_vector,ii)<=tmp_geomean)
00659   {
00660     cellcount=_localityCount-ii-1;
00661     break;
00662   }
00663 
00664     //Log::instance()->info( "getValue: cellcount %i\n", cellcount );
00665     Scalar myReturn=(Scalar)cellcount/_localityCount;
00666 
00667     //Log::instance()->info( "enfa::getValue %6.2f\n", myReturn );
00668 
00669     return myReturn;
00670 }
00671 
00678 int Enfa::getConvergence( Scalar * const val ) const
00679 {
00680     return 0;
00681 }
00682 
00683 //
00684 // General Helper Methods
00685 //
00686 
00687 
00688 void Enfa::displayVector(const gsl_vector * v, const char * name, const bool roundFlag) const
00689 {
00690     if (roundFlag)
00691     {
00692 
00693       fprintf( stderr, "\nDisplaying Vector rounded to 4 decimal places '%s' (%u): \n----------------------------------------------\n[  ", name, static_cast<unsigned int>(v->size) );
00694     }
00695     else
00696     {
00697       fprintf( stderr, "\nDisplaying Vector '%s' (%u): \n----------------------------------------------\n[  ", name, static_cast<unsigned int>(v->size) );
00698     }
00699 
00700     char sep1[] = ", ";
00701 
00702     for (unsigned int i=0;i<v->size;++i)
00703     {
00704       if (i == v->size -1)
00705         strcpy(sep1, " ]");
00706 
00707       double myDouble = gsl_vector_get (v,i);
00708       if (roundFlag)
00709       {
00710         fprintf( stderr, "%.4g %s", myDouble, sep1 );
00711       }
00712       else
00713       {
00714         fprintf( stderr, "%g %s", myDouble, sep1 );
00715       }
00716     }
00717     fprintf( stderr, "\n----------------------------------------------\n" );
00718 }
00719 
00720 
00721 /**********************/
00722 /**** displayMatrix ***/
00723 void Enfa::displayMatrix(const gsl_matrix * m, const char * name, const bool roundFlag) const
00724 {
00725     if (!roundFlag)
00726     {
00727       fprintf( stderr, "\nDisplaying Matrix '%s' (%u / %u): \n----------------------------------------------\n[\n", name, static_cast<unsigned int>(m->size1), static_cast<unsigned int>(m->size2) );
00728     }
00729     else
00730     {
00731       fprintf( stderr, "\nDisplaying Matrix rounded to 6 decimal places '%s' (%u / %u): \n----------------------------------------------\n[\n", name, static_cast<unsigned int>(m->size1), static_cast<unsigned int>(m->size2) );
00732     }
00733     for (unsigned int i=0;i<m->size1;++i)
00734     {
00735       char sep1[] = ",";
00736       char sep2[] = ";";
00737 
00738       for (unsigned int j=0;j<m->size2;j++)
00739       {
00740         double myDouble = gsl_matrix_get (m,i,j);
00741 
00742         if (j == m->size2 -1)
00743           strcpy(sep1, "");
00744         if (!roundFlag)
00745         {
00746           fprintf( stderr, "%g %s ", myDouble, sep1 );
00747         }
00748         else
00749         {
00750           fprintf( stderr, "%.6g %s ", myDouble, sep1 );
00751         }
00752       }
00753 
00754       fprintf( stderr, "%s\n", sep2 );
00755     }
00756     fprintf( stderr, "]\n----------------------------------------------\n" );
00757 }
00758 
00759 /**********************/
00760 /**** displayMatrix ***/
00761 void Enfa::displayLoadings(const gsl_matrix * m, const int f) const
00762 {
00763   Log::instance()->info( "Factor Loadings:\n");
00764 
00765   // generate header
00766   Log::instance()->info( "Var,Marg");
00767   //fprintf( stderr, "Var,Marg");
00768 
00769   for (signed int j=1;j<f;j++)
00770     {
00771       fprintf( stderr, ",Sp-%i", j);
00772     }
00773 
00774   fprintf( stderr, "\n");
00775 
00776   for (unsigned int i=0;i<m->size1;++i)
00777     {
00778       Log::instance()->info( "%i",i);
00779       //fprintf( stderr, "%i",i);
00780       for (signed int j=0;j<f;j++)
00781       {
00782         double myDouble = gsl_matrix_get (m,i,j);
00783 
00784     fprintf( stderr, ",%3.3f", myDouble);
00785       }
00786       fprintf( stderr, "\n");
00787     }
00788 }
00789 
00790 /*****************************************************************
00791  function to find the 'square-root' of a matrix:
00792   the input matrix must be positive semi-definite symmetric matrix 
00793   note that covariance matrices have these properties */
00794 gsl_matrix* Enfa::sqrtm(gsl_matrix* original_matrix) const
00795 {
00796     int m_size, i, j;
00797     double lambda, v;
00798 
00799     /* make a copy of the input matrix */
00800     // Build a copy of the input matrix to work with
00801     m_size = original_matrix->size1;
00802 
00803     gsl_matrix* m = gsl_matrix_alloc (m_size, m_size);
00804     gsl_matrix_memcpy (m, original_matrix);
00805 
00806     //create temporary space for eigenvalues, eigenvectors & workspace
00807     gsl_vector* eigval_v = gsl_vector_alloc (m_size);
00808     gsl_matrix* eigvect_m = gsl_matrix_alloc (m_size, m_size);
00809     gsl_eigen_symmv_workspace * temp_v = gsl_eigen_symmv_alloc (m_size);
00810 
00811     gsl_eigen_symmv (m,
00812          eigval_v,
00813          eigvect_m,
00814          temp_v);
00815 
00816     gsl_eigen_symmv_free(temp_v);
00817 
00818     /* allocate and initialise a new matrix temp_m  */
00819     /* this is eigvect_m scaled by the root of the eigenvalues */
00820     gsl_matrix* temp_m = gsl_matrix_alloc (m_size, m_size);
00821     for (j=0; j<m_size; ++j) {
00822   // ignore very small negative numbers
00823   v=gsl_vector_get(eigval_v,j);
00824   if (v<0 && v>-0.000001)
00825   {lambda=0.0;}
00826   else if (v>0)
00827   {lambda=pow(v,0.5);}
00828   else
00829   {
00830           gsl_matrix_free(temp_m);
00831           gsl_matrix_free(m);
00832           gsl_matrix_free(eigvect_m);
00833           gsl_vector_free(eigval_v);
00834     std::string msg = "Enfa::sqrtm:Cannot calculate square root for matrix - model will fail\n", v;
00835     Log::instance()->error( msg.c_str() );
00836     throw InverseFailedException( msg.c_str() );
00837   }
00838 
00839   for (i=0; i<m_size; ++i) {
00840       gsl_matrix_set(temp_m, i, j, gsl_matrix_get(eigvect_m,i,j)*lambda);
00841   }
00842     }
00843 
00844     /* calculate the square root =temp_m * eigvect' */
00845     gsl_matrix* _root = gsl_matrix_alloc (m_size, m_size);
00846     // multiply using blas (note transpose for second item)
00847     gsl_blas_dgemm(CblasNoTrans, CblasTrans,
00848        1.0, temp_m, eigvect_m,
00849        0.0, _root);
00850 
00851     /* code for checking the root - not really needed */
00877     gsl_matrix_free(temp_m);
00878     gsl_matrix_free(m);
00879     gsl_matrix_free(eigvect_m);
00880     gsl_vector_free(eigval_v);
00881 
00882     return _root;
00883 
00884 }
00885 
00886 /*****************************************************************
00887  * Calculate the geometric mean of the distance from point v to 
00888    all species observation points in factored environmental space
00889    weighting each factor accordingly 
00890 *****************************************************************/
00891 double Enfa::getGeomean(gsl_vector* v) const
00892 {
00893 
00894     //Log::instance()->info( "enfa::getGeomean\n" );
00895 
00896     //if (_verboseDebug)
00897     //displayVector(v, "getGeomean input vector v:", true);
00898 
00899     gsl_vector * tmp_v = gsl_vector_alloc(_retained_components_count);
00900     gsl_vector * tmp_gsl_factor_weights = gsl_vector_alloc(_retained_components_count);
00901 
00902     for (int i=0; i<_retained_components_count; ++i)
00903       {
00904   gsl_vector_set(tmp_v, i,gsl_vector_get(v,i));
00905   gsl_vector_set(tmp_gsl_factor_weights, i, gsl_vector_get(_gsl_factor_weights,i));
00906       }
00907     
00908     gsl_vector * tmp_distance_vector = gsl_vector_alloc(_retained_components_count);
00909     //gsl_vector * tmp_distance_vector = gsl_vector_alloc(_layer_count);
00910 
00911     gsl_vector_view tmp_matrix_row_view;
00912     double tmp_geomean=1.0;
00913     double tmp_dist_workspace;
00914 
00915     // loop through localities
00916     for (int i=0; i<_localityCount; ++i)
00917     {
00918   tmp_matrix_row_view = gsl_matrix_row(_gsl_environment_factor_matrix,i);
00919   //displayVector(&tmp_matrix_row_view.vector, "getGeomean: locality vector i", true);
00920   //for (int j=0; j<_layer_count; ++j)
00921   // dont need to calculate beyonde the retained components
00922   for (int j=0; j<_retained_components_count; ++j)
00923         gsl_vector_set(tmp_distance_vector, j,
00924          gsl_vector_get(&tmp_matrix_row_view.vector,j));
00925   //displayVector(tmp_distance_vector, "getGeomean: locality vector i copy ", true);
00926 
00927   // difference between cell and current locality
00928   gsl_vector_sub(tmp_distance_vector,tmp_v);
00929   //gsl_vector_sub(tmp_distance_vector,v);
00930   //displayVector(tmp_distance_vector, "getGeomean: locality vector i subbed ", true);
00931 
00932   // square the differences
00933   gsl_vector_mul(tmp_distance_vector, tmp_distance_vector);
00934   //displayVector(tmp_distance_vector, "getGeomean: locality vector i squared ", true);
00935   // multiply by the factor weights
00936   gsl_vector_mul(tmp_distance_vector,tmp_gsl_factor_weights);
00937   //gsl_vector_mul(tmp_distance_vector,_gsl_factor_weights);
00938   // square and sum the values to get the distance
00939   //displayVector(tmp_distance_vector, "getGeomean: locality vector i weighted ", true);
00940 
00941   tmp_dist_workspace=pow(gsl_blas_dasum(tmp_distance_vector),0.5);
00942   //Log::instance()->info( "euclidean distance: %6.2f\n", tmp_dist_workspace );
00943 
00944   // accumulate for geometric mean calculation
00945   // if (tmp_dist_workspace!=0) tmp_geomean*=tmp_dist_workspace;
00946         // log transform values to reduce chance of float overflow
00947         if (tmp_dist_workspace!=0) tmp_geomean+=log(tmp_dist_workspace);
00948   //Log::instance()->info( "accumulating distance: %6.2f\n", tmp_geomean );
00949     }
00950 
00951     //Log::instance()->info( "tmp_geomean1: %6.2f\n", tmp_geomean );    
00952 
00953     // finally work out geometric mean of distances to the species points
00954     // tmp_geomean=pow(tmp_geomean,(double)1/_localityCount);
00955     // reverse the log transformation 
00956     tmp_geomean=exp(tmp_geomean/_localityCount);
00957 
00958     //if (_verboseDebug)
00959     //  Log::instance()->info( "getgeomean returns: %6.2f\n", tmp_geomean );    
00960 
00961     gsl_vector_free(tmp_distance_vector);
00962     gsl_vector_free(tmp_v);
00963     gsl_vector_free(tmp_gsl_factor_weights);
00964 
00965     return tmp_geomean;
00966 }
00967 
00968 /* calculate the inverse using cholesky decomposition */
00969 gsl_matrix* Enfa::inverse(gsl_matrix* _m) const
00970 {
00971     // dont crash on error - catch it
00972     gsl_error_handler_t* old_handler = gsl_set_error_handler_off();
00973 
00974     int m_size=_m->size1;
00975     gsl_matrix*_mcopy = gsl_matrix_alloc(m_size, m_size);
00976     gsl_matrix*_inverse = gsl_matrix_alloc(m_size, m_size);
00977     gsl_matrix_set_identity(_inverse);
00978     gsl_matrix_memcpy(_mcopy,_m);
00979     _gsl_vector_view _MI;
00980 
00981     int choldecomp;
00982     choldecomp = gsl_linalg_cholesky_decomp(_mcopy);
00983     Log::instance()->debug( "Cholesky decomp result %i\n", choldecomp );
00984 
00985     int cholsvx;
00986     for (int i=0; i<m_size; i++)
00987     {
00988   _MI=gsl_matrix_row(_inverse,i);
00989   cholsvx = gsl_linalg_cholesky_svx(_mcopy, &_MI.vector);
00990   Log::instance()->debug( "Cholesky svx result %i\n", cholsvx );
00991   //displayVector(&_MI.vector, "&MI.vector", true);
00992     }
00993 
00994 
00995     // sometimes the inverse fails - check it is correct
00996     gsl_matrix *_testInverse = gsl_matrix_alloc(m_size, m_size);
00997     gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,
00998        _m,_inverse,0.0,_testInverse);
00999 
01000     int mycomp, myval;
01001     for (int i=0; i<m_size; i++)
01002   for (int j=0; j<m_size; j++)
01003   {
01004       // check values equal identity matrix (to accuracy of Xdp)
01005       myval=(int)(ceil(gsl_matrix_get(_testInverse,i,j)*pow((double)10,6)-0.49999999)/pow((double)10,6));
01006       mycomp = (i == j) ? 1 : 0;
01007       if (myval!=mycomp)
01008       {
01009     Log::instance()->warn( "Enfa::inverse failed to invert matrix!\n" );
01010     //displayMatrix(_m, "input matrix", true);
01011     //displayMatrix(_inverse, "inverse", true);
01012     //displayMatrix(_testInverse, "check inverse", true);
01013                 gsl_matrix_free(_mcopy);
01014                 gsl_matrix_free(_testInverse);
01015     std::string msg = "Enfa::inverse failed to invert matrix\n";
01016     Log::instance()->error( msg.c_str() );
01017     throw InverseFailedException( msg.c_str() );
01018       }
01019   }
01020 
01021     gsl_matrix_free(_mcopy);
01022     gsl_matrix_free(_testInverse);
01023 
01024     // restore error handler
01025     gsl_set_error_handler (old_handler);
01026   
01027     return _inverse;
01028 }
01029 
01030 /***********************/
01031 /**** autoCovariance ***/
01032 /* calculate covariance matrix as  cov(<matrix>) matlab/octave function  */
01033 gsl_matrix * Enfa::autoCovariance(gsl_matrix * original_matrix)
01034 {
01035   int j;
01036 
01037   int numrows = original_matrix->size1;
01038   int numcols = original_matrix->size2;
01039 
01040   // Build a copy of the input matrix to work with
01041   gsl_matrix * m = gsl_matrix_alloc (numrows, numcols);
01042   gsl_matrix_memcpy (m, original_matrix);
01043 
01044 
01045   if (numrows == 1)
01046   {
01047     gsl_matrix_transpose(m);
01048   }
01049 
01050   // compute: ones (n, 1) * sum (x)
01051   gsl_matrix * s = gsl_matrix_alloc (numrows, numcols);
01052 
01053   if (numrows == 1)
01054   {
01055     gsl_matrix_memcpy (m, s);
01056   }
01057   else
01058   {
01059     gsl_vector * v = gsl_vector_alloc(numcols);
01060 
01061     for (int i = 0; i < numcols; i++)
01062     {
01063       double val = 0.0;
01064 
01065       for (j = 0; j < numrows; j++)
01066       {
01067         val += gsl_matrix_get (m, j, i);
01068       }
01069 
01070       gsl_vector_set (v, i, val);
01071 
01072       for (j = 0; j < numrows; j++)
01073       {
01074         gsl_matrix_set_row (s, j, v);
01075       }
01076     }
01077     gsl_vector_free(v);
01078   }
01079 
01080   // divide by "n"
01081   gsl_matrix_scale (s, (double)1/numrows);
01082 
01083   // subtract the result from x
01084   gsl_matrix_sub (m, s);
01085 
01086   // x / (n - 1)
01087   //gsl_matrix_scale (m, (double)1/(numrows-1));
01088 
01089   // multiply by x'
01090   gsl_matrix * p = gsl_matrix_alloc (numcols, numcols);
01091   gsl_blas_dgemm(CblasTrans,CblasNoTrans,1.0,
01092      m,m,0.0,p);
01093 
01094   // scale the result
01095   gsl_matrix_scale (p, (double)1/(numrows-1));
01096 
01097   gsl_matrix_free (m);
01098   gsl_matrix_free (s);
01099 
01100   return p;
01101 }
01102 
01103 /**********************************************/
01104 /* decide which components to retain/discard  */
01105 /* using the method defined by _discardMethod */
01106 /**********************************************/
01107 int Enfa::discardComponents() const
01108 {
01109     double variationTotal=0;
01110     int returnValue=_layer_count;
01111 
01112     // take the top X components
01113     if (_discardMethod==0)
01114     {
01115   Log::instance()->info( "Discarding components with fixed method (0)\n");
01116   returnValue=_retainComponents;
01117   for (int i=0; i<_retainComponents; ++i)
01118       variationTotal+=gsl_vector_get(_gsl_factor_weights_all_components,i);
01119     }
01120     // take the components that account for X % of variation
01121     else if (_discardMethod==1)
01122     {
01123       Log::instance()->info( "Discarding components with variation method (1) (variation=%6.2f)\n", _retainVariation);
01124   for (int i=0; i<_layer_count; ++i)
01125   {
01126       variationTotal+=gsl_vector_get(_gsl_factor_weights_all_components,i);
01127       if (variationTotal>=_retainVariation)
01128       {
01129     // variation exceeds specified amount - keep these components
01130     returnValue=i+1;
01131     break;
01132       }
01133   }
01134     }
01135     // take the components with variation higher than broken stick distribution
01136     else if (_discardMethod==2)
01137     {
01138   Log::instance()->info("Discarding components: Broken stick method (2)\n");
01139         /* work out the broken stick distribution of expected eigenvalues
01140    * based on Jackson's formula for the kth eigenvalue
01141    * bk = sum[i=k...p](1/i) where p=number of eigenvalues */
01142   /* use broken stick only for specialisation factors
01143      as often marginality weight is lower than other factors,
01144      but we always want to keep marginality as the first factor */
01145   gsl_vector* _brokenStickDist = gsl_vector_alloc(_layer_count-1);
01146   for (int i=0; i<_layer_count-1; ++i)
01147   {
01148       double tmp_total=0;
01149       for (int j=i+1; j<=_layer_count-1; ++j)
01150     tmp_total+=(double)1/j;
01151       gsl_vector_set(_brokenStickDist,i, (double)tmp_total/(_layer_count-1));
01152   }
01153 
01154   //displayVector(_brokenStickDist, "broken stick distribution", true);
01155 
01156   double myVariation;
01157   // now compare with the observed variation
01158   for (int i=0; i<_layer_count-1; ++i)
01159   {
01160     //myVariation=gsl_vector_get(_gsl_factor_weights_all_components,i);
01161     // rescale specificity ignoring marginality (first) factor
01162     myVariation=gsl_vector_get(_gsl_factor_weights_all_components,i+1)/(1-gsl_vector_get(_gsl_factor_weights_all_components,0));
01163     //Log::instance()->info("Orig variation %6.2f, new variation %6.2f\n", gsl_vector_get(_gsl_factor_weights_all_components,i+1), myVariation);
01164 
01165       if (myVariation <= gsl_vector_get(_brokenStickDist,i))
01166       {
01167     if (i==0) // problem - no components selected
01168     {
01169         Log::instance()->warn( "First specialisation component explains less variation than the broken stick distribution - retaining all components by default\n" );
01170         variationTotal=1;
01171         returnValue=_layer_count;
01172         break;
01173     }
01174     else // this component is discarded but keep earlier ones
01175     {
01176         returnValue=i+1;
01177         break;
01178     }
01179       }
01180       variationTotal+=myVariation;
01181   }
01182   gsl_vector_free(_brokenStickDist);
01183     }
01184     else
01185     {
01186   Log::instance()->warn( "Unknown problem whilst discarding components, retaining all components by default\n");
01187   variationTotal=1;
01188     }
01189     Log::instance()->info( "Retained components: %i/%i, variation explained: %.2f\n", returnValue, _layer_count, variationTotal);
01190     return returnValue;
01191 }
01192 
01193 
01194 
01196 bool Enfa::enfa1()
01197 {
01198 
01199   //calculate the mean and std deviation for background points
01200   //Log::instance()->info( "Calculating mean and stddev for background points\n" );
01201   _gsl_avg_background_vector = gsl_vector_alloc (_gsl_background_matrix->size2);
01202   _gsl_stddev_background_vector = gsl_vector_alloc (_gsl_background_matrix->size2) ;
01203   calculateMeanAndSd(_gsl_background_matrix,_gsl_avg_background_vector,_gsl_stddev_background_vector);
01204 
01205   if (_verboseDebug)
01206     {
01207       displayVector(_gsl_avg_background_vector, "Background Means", true);
01208       displayVector(_gsl_stddev_background_vector, "Background Stdev", true);
01209     }
01210 
01211   //calculate the mean and std deviation for centered presence points
01212   _gsl_avg_vector = gsl_vector_alloc (_gsl_environment_matrix->size2);
01213   _gsl_stddev_vector = gsl_vector_alloc (_gsl_environment_matrix->size2) ;
01214   calculateMeanAndSd(_gsl_environment_matrix,_gsl_avg_vector,_gsl_stddev_vector);
01215 
01216   if (_verboseDebug)
01217     {
01218       displayVector(_gsl_avg_vector, "Species Means", true);
01219       displayVector(_gsl_stddev_vector, "Species Stdev", true);
01220     }
01221 
01222   //center and standardise the data based on background means
01223   center(_gsl_environment_matrix,_localityCount);
01224   center(_gsl_background_matrix,_backgroundCount);
01225 
01226   //displayMatrix(_gsl_background_matrix, "_gsl_background_matrix", true);
01227 
01228   //calculate the mean and std deviation for centered presence points
01229   calculateMeanAndSd(_gsl_environment_matrix,_gsl_avg_vector,_gsl_stddev_vector);
01230 
01231   if (_verboseDebug)
01232     {
01233       displayVector(_gsl_avg_vector, "Species Means - centered", true);
01234       displayVector(_gsl_stddev_vector, "Species Stdev - centered", true);
01235     }
01236 
01237   //Now calculate the covariance matrix:
01238   //Log::instance()->info( "Calculating covariance matrix\n" );
01239   _gsl_covariance_matrix = autoCovariance(_gsl_environment_matrix);
01240   if (_verboseDebug)
01241     displayMatrix(_gsl_covariance_matrix, "Species Covariance Matrix", true);
01242 
01243   // now get covariance for background data
01244   _gsl_covariance_background_matrix = autoCovariance(_gsl_background_matrix);
01245   if (_verboseDebug)
01246     displayMatrix(_gsl_covariance_background_matrix, "Background Covariance Matrix", true);
01247 
01248   // get root inverse of species covariance matrix
01249   _gsl_covariance_matrix_root_inverse = gsl_matrix_alloc(_gsl_covariance_matrix->size1,_gsl_covariance_matrix->size2);
01250   gsl_matrix* gsl_sqrtm_matrix = sqrtm(_gsl_covariance_matrix);
01251   _gsl_covariance_matrix_root_inverse = inverse(gsl_sqrtm_matrix);
01252   gsl_matrix_free(gsl_sqrtm_matrix);
01253 
01254   //z = cov_species^-0.5 * m';
01255   _gsl_workspace_z = gsl_vector_alloc (_layer_count);
01256   gsl_blas_dgemv (CblasNoTrans, 1.0, _gsl_covariance_matrix_root_inverse, _gsl_avg_vector, 0.0, _gsl_workspace_z);
01257 
01258   if (_verboseDebug)
01259     displayVector(_gsl_workspace_z, "z", true);
01260 
01261   //W = cov_species^-0.5 * cov_global * cov_species^-0.5;
01262   gsl_matrix* _gsl_workspace_W_temp = gsl_matrix_alloc(_layer_count, _layer_count);
01263   _gsl_workspace_W = gsl_matrix_alloc(_layer_count, _layer_count);
01264   gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,
01265      _gsl_covariance_matrix_root_inverse,
01266      _gsl_covariance_background_matrix,0.0,
01267      _gsl_workspace_W_temp);
01268 
01269   gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,1.0,
01270      _gsl_workspace_W_temp, 
01271      _gsl_covariance_matrix_root_inverse,0.0,
01272      _gsl_workspace_W);
01273 
01274   gsl_matrix_free(_gsl_workspace_W_temp);
01275   
01276   if (_verboseDebug)
01277     displayMatrix(_gsl_workspace_W, "W", true);
01278 
01279   //y = z / sqrtm(z' * z);
01280   //require view of z as an Nx1 matrix rather than a vector
01281   gsl_matrix_view _gsl_workspace_z_matrix_view;
01282   _gsl_workspace_z_matrix_view = gsl_matrix_view_vector(_gsl_workspace_z,
01283               1,_layer_count);
01284   double zz;
01285   gsl_blas_ddot(_gsl_workspace_z, _gsl_workspace_z, &zz);
01286 
01287   _gsl_workspace_y = gsl_matrix_alloc(1,_layer_count);
01288   gsl_matrix_memcpy(_gsl_workspace_y, &_gsl_workspace_z_matrix_view.matrix);
01289   gsl_matrix_scale(_gsl_workspace_y,pow(zz, -0.5));
01290 
01291   if (_verboseDebug)
01292     displayMatrix(_gsl_workspace_y, "y", true);
01293 
01294   //eye(N)=identity matrix with dimension NxN (use gsl_matrix_set_identity())
01295   //H = (eye(no_egvs) - y * y') * W * (eye(no_egvs) - y * y');
01296   _gsl_workspace_H = gsl_matrix_alloc(_layer_count,_layer_count);
01297   gsl_matrix* _gsl_workspace_H_temp1 = gsl_matrix_alloc(_layer_count,_layer_count);
01298   gsl_matrix* _gsl_workspace_H_temp2 = gsl_matrix_alloc(_layer_count,_layer_count);
01299   gsl_matrix* _gsl_workspace_H_temp3 = gsl_matrix_alloc(_layer_count,_layer_count);
01300 
01301   //initialise identity matrix
01302   gsl_matrix_set_identity(_gsl_workspace_H_temp1);
01303 
01304   // first step work out (y * y')
01305   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, 
01306      _gsl_workspace_y, 
01307      _gsl_workspace_y,
01308      0.0, _gsl_workspace_H_temp2);
01309 
01310   // now subtract from identity
01311   gsl_matrix_sub(_gsl_workspace_H_temp1,_gsl_workspace_H_temp2);
01312 
01313   // product of this with W
01314   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, 
01315      _gsl_workspace_H_temp1, 
01316      _gsl_workspace_W,
01317      0.0, _gsl_workspace_H_temp3);
01318   
01319   // finally product of this with temp1
01320   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, 
01321      _gsl_workspace_H_temp3, 
01322      _gsl_workspace_H_temp1, 
01323      0.0, _gsl_workspace_H);
01324 
01325   gsl_matrix_free(_gsl_workspace_H_temp1);
01326   gsl_matrix_free(_gsl_workspace_H_temp2);
01327   gsl_matrix_free(_gsl_workspace_H_temp3);
01328 
01329   if (_verboseDebug)
01330     displayMatrix(_gsl_workspace_H, "H", true);
01331 
01332   //work out eigen-vectors/values of H
01333   _gsl_eigenvalue_vector = gsl_vector_alloc (_layer_count);
01334   gsl_vector * _gsl_eigenvalue_vector_copy = gsl_vector_alloc (_layer_count);
01335   _gsl_eigenvector_matrix = gsl_matrix_alloc (_layer_count, _layer_count);
01336   gsl_eigen_symmv_workspace * _gsl_eigen_workpace = gsl_eigen_symmv_alloc (_layer_count);
01337   gsl_eigen_symmv (_gsl_workspace_H,
01338        _gsl_eigenvalue_vector,
01339        _gsl_eigenvector_matrix,
01340        _gsl_eigen_workpace);
01341 
01342   //free the temporary workspace
01343   gsl_eigen_symmv_free (_gsl_eigen_workpace);
01344 
01345   //create score_matrix = cov_species^-0.5 * v;
01346   _gsl_score_matrix=gsl_matrix_alloc(_layer_count,_layer_count);
01347   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, 
01348      _gsl_covariance_matrix_root_inverse,
01349      _gsl_eigenvector_matrix,
01350      0.0, _gsl_score_matrix);
01351 
01352   /* sort eigenvalues, eigenvectors and score matrix */
01353   gsl_vector * _gsl_eigenvalue_vector_presort = gsl_vector_alloc(_layer_count);
01354   gsl_vector_memcpy(_gsl_eigenvalue_vector_presort, _gsl_eigenvalue_vector);
01355   gsl_eigen_symmv_sort (_gsl_eigenvalue_vector, _gsl_eigenvector_matrix,
01356       GSL_EIGEN_SORT_VAL_DESC);
01357   gsl_eigen_symmv_sort (_gsl_eigenvalue_vector_presort, _gsl_score_matrix,
01358       GSL_EIGEN_SORT_VAL_DESC);
01359 
01360   //Log::instance()->info( "Eigenvector sorted\n" );
01361 
01362   /* Replace the min eigenvalue with the difference of the trace of W & H
01363      this becomes eigenvalue 1, the remaining eigenvalues move to index 2+
01364      t = eigenvalues ~= min(eigenvalues);
01365      eigenvalues(2:no_egvs) = eigenvalues(t);
01366      eigenvalues(1) = trace(W) - trace(H)*/
01367 
01368   _gsl_vector_min = gsl_vector_min_index(_gsl_eigenvalue_vector);
01369   //Log::instance()->info( "min eigevalue: %i\n",  _gsl_vector_min);
01370   //Log::instance()->info( "min eigevalue: %6.2f\n",  gsl_vector_min(_gsl_eigenvalue_vector));
01371 
01372   gsl_vector_memcpy (_gsl_eigenvalue_vector_copy,
01373          _gsl_eigenvalue_vector);
01374 
01375   // note starting at 1 as position 0 will be overwritten later
01376   for (int i=1; i<_layer_count; i++)
01377   {
01378       if (_gsl_vector_min>=i)
01379       {
01380     gsl_vector_set(_gsl_eigenvalue_vector,i,
01381        gsl_vector_get(_gsl_eigenvalue_vector_copy,i-1));
01382       }
01383   }
01384 
01385   //displayVector(_gsl_eigenvalue_vector, "_gsl_eigenvalue_vector:A", true);
01386   //displayVector(_gsl_eigenvalue_vector_copy, "_gsl_eigenvalue_vector_copy:A", true);
01387 
01388   gsl_vector_free(_gsl_eigenvalue_vector_copy);
01389 
01390   // now set first eigenvalue to trace(W) - trace(H)
01391   double traceW=0.0;
01392   for (int i=0; i<_layer_count; ++i) traceW+=gsl_matrix_get(_gsl_workspace_W, i, i);
01393   double traceH=0.0;
01394   for (int i=0; i<_layer_count; ++i) traceH+=gsl_matrix_get(_gsl_workspace_H, i, i);
01395   gsl_vector_set(_gsl_eigenvalue_vector,0,traceW-traceH);
01396 
01397   /* % Figure out which eigenvalue has gone AWOL */
01398 
01399   /* remove column associated with dodgy eigenvector (_gsl_vector_min) */
01400   /* Replace the first column with the vector of means */
01401   /* and the move other columns down */
01402   gsl_matrix * _gsl_score_matrix_copy=gsl_matrix_alloc(_layer_count,_layer_count);
01403   gsl_matrix_memcpy(_gsl_score_matrix_copy, _gsl_score_matrix);
01404 
01405   for (int i=0; i<_layer_count; ++i)
01406       for (int j=0; j<_layer_count; ++j)
01407       {
01408     if (j==0)
01409     {
01410       gsl_matrix_set(_gsl_score_matrix,i,j,gsl_vector_get(_gsl_avg_vector,i));
01411     }
01412     else if (_gsl_vector_min>=j)
01413     {       
01414       gsl_matrix_set(_gsl_score_matrix,i,j,
01415          gsl_matrix_get(_gsl_score_matrix_copy,i,j-1));
01416     }
01417       }
01418 
01419   gsl_matrix_free(_gsl_score_matrix_copy);
01420 
01421   double score2norm;
01422   /* Norm the columns of the score_matrix
01423      - that is divide each value by the 2-norm of its column */
01424   for (int i=0; i<_layer_count; ++i)
01425   {
01426       // work out the 2-norm for this column = sqrt(sum(x^2))
01427       double unorm=0.0;
01428       for (int j=0; j<_layer_count; ++j)
01429   unorm+=pow(gsl_matrix_get(_gsl_score_matrix,j,i),2);
01430       unorm=pow(unorm,0.5);
01431 
01432       //Log::instance()->info( "col i: %i norm: %6.2f\n",  i, unorm);
01433 
01434       // and apply this normalisation to the matrix
01435       for (int j=0; j<_layer_count; ++j)
01436       {
01437   score2norm = gsl_matrix_get(_gsl_score_matrix,j,i);
01438   //Log::instance()->info( "score 2 norm: %6.2f\n",  score2norm);
01439   score2norm = score2norm/unorm;
01440   //Log::instance()->info( "score 2 normed: %6.2f\n",  score2norm);
01441   gsl_matrix_set(_gsl_score_matrix,j,i,score2norm);
01442       }
01443   }
01444 
01445   if (_verboseDebug)
01446   {
01447       displayVector(_gsl_eigenvalue_vector, "Eigenvalues", true);
01448       displayMatrix(_gsl_eigenvector_matrix, "Eigenvectors", true);
01449       displayMatrix(_gsl_score_matrix, "Score Matrix", true);
01450   }
01451 
01452   /* marginality = geometric mean of means 
01453      marginality = sqrt(sum(m.^2));
01454      marginality = marginality / 1.96 */
01455 
01456   _marginality=0.0;
01457   for (int i=0; i<_layer_count; ++i)
01458       _marginality+=pow(gsl_vector_get(_gsl_avg_vector,i),2);
01459   _marginality=pow(_marginality,0.5)/1.96;
01460   Log::instance()->info( "Marginality: %6.2f\n", _marginality );
01461 
01462   /* Calculate global specialization
01463      specialization = sqrt(sum(eigenvalues)/length(m)) */
01464   _specialisation=pow((gsl_blas_dasum(_gsl_eigenvalue_vector)/_layer_count),0.5);
01465   Log::instance()->info( "Specialisation: %6.2f\n", _specialisation );
01466 
01467 
01468   /* work out factor weights = eigenvalues/sum(eigenvalues) 
01469    * dicarded components are given a zero weighting */
01470   _gsl_factor_weights_all_components = gsl_vector_alloc(_layer_count);
01471   for (int i=0; i<_layer_count; ++i)
01472   {
01473       gsl_vector_set(_gsl_factor_weights_all_components,i,
01474          fabs(gsl_vector_get(_gsl_eigenvalue_vector,i)));
01475   }
01476   gsl_vector_scale(_gsl_factor_weights_all_components, 1.0/gsl_blas_dasum(_gsl_factor_weights_all_components));
01477 
01478   //After the model is generated, we can discard unwanted components!
01479   _retained_components_count=discardComponents();
01480 
01481   /* work out factor weights = eigenvalues/sum(eigenvalues) 
01482    * dicarded components are given a zero weighting */
01483   _gsl_factor_weights = gsl_vector_alloc(_layer_count);
01484   gsl_vector_set_zero(_gsl_factor_weights);
01485   for (int i=0; i<_retained_components_count; ++i)
01486   {
01487       gsl_vector_set(_gsl_factor_weights,i,
01488          fabs(gsl_vector_get(_gsl_eigenvalue_vector,i)));
01489   }
01490   gsl_vector_scale(_gsl_factor_weights, 1.0/gsl_blas_dasum(_gsl_factor_weights));
01491 
01492   // print factor weights
01493   for (int i=0; i<_retained_components_count; ++i)
01494   {
01495     if (i==0)
01496       {
01497   Log::instance()->info("Marginality (factor 0) weight: %.2f, variation explained: %.2f\n", gsl_vector_get(_gsl_factor_weights,i), gsl_vector_get(_gsl_factor_weights_all_components,i));
01498       }
01499     else
01500       {
01501   Log::instance()->info("Specialisation factor %i weight: %.2f, variation explained: %.2f\n", i, gsl_vector_get(_gsl_factor_weights,i), gsl_vector_get(_gsl_factor_weights_all_components,i));
01502       }
01503   }
01504 
01505   /* Display the variable loadings of each factor (score matrix) */
01506   if (_displayLoadings)
01507     {
01508       displayLoadings(_gsl_score_matrix, _retained_components_count);
01509     }
01510 
01511   /* work out geometric mean of the distance between each species locality and 
01512      every other locality based on the factored data */
01513   _gsl_geomean_vector = gsl_vector_alloc(_localityCount);
01514   _gsl_environment_factor_matrix = gsl_matrix_alloc(_localityCount,_layer_count);
01515   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans,
01516      1.0, _gsl_environment_matrix, _gsl_score_matrix, 
01517      0.0, _gsl_environment_factor_matrix);
01518 
01519   gsl_vector_view _gsl_geomean_workspace;
01520 
01521   double _tmp_geomean;
01522 
01523   //Log::instance()->info( "Calculating geometric means\n" );
01524   //displayMatrix(_gsl_environment_factor_matrix, "_gsl_environment_factor_matrix", true);
01525 
01526   // loop through the localities and calculate the geomean
01527   for (int i=0; i<_localityCount; ++i)
01528   {
01529       _gsl_geomean_workspace=gsl_matrix_row(_gsl_environment_factor_matrix,i);
01530       _tmp_geomean=getGeomean(&_gsl_geomean_workspace.vector);
01531       gsl_vector_set(_gsl_geomean_vector, i, _tmp_geomean);
01532   }
01533 
01534   // finally sort the geomeans to speed up processing later
01535   gsl_sort_vector(_gsl_geomean_vector);
01536 
01537   Log::instance()->info( "ENFA Model Generation Completed\n" );
01538 
01539   return true;
01540 
01541 }
01542 
01543 
01544 
01545 /****************************************************************/
01546 /****************** configuration *******************************/
01547 void
01548 Enfa::_getConfiguration( ConfigurationPtr& config ) const
01549 {
01550   if (!_done )
01551     return;
01552 
01553   ConfigurationPtr model_config( new ConfigurationImpl("Enfa") );
01554   config->addSubsection( model_config );
01555 
01556   // _marginality
01557   model_config->addNameValue( "Marginality", _marginality );
01558 
01559   // _specialisation
01560   model_config->addNameValue( "Specialisation", _specialisation );
01561 
01562   // _retained_components_count
01563   model_config->addNameValue( "RetainedComponents", _retained_components_count );
01564 
01565   double *values = new double[_layer_count];
01566 
01567   // _gsl_avg_background_vector
01568   for (int i=0; i < _layer_count; ++i)
01569     values[i] = gsl_vector_get(_gsl_avg_background_vector, i);
01570 
01571   model_config->addNameValue( "AvgBackgroundVector", values, _layer_count );
01572 
01573   // _gsl_stddev_background_vector
01574   for (int i=0; i < _layer_count; ++i)
01575     values[i] = gsl_vector_get(_gsl_stddev_background_vector, i);
01576 
01577   model_config->addNameValue( "StddevBackgroundVector", values, _layer_count );
01578 
01579   // _gsl_avg_vector
01580   for (int i=0; i < _layer_count; ++i)
01581     values[i] = gsl_vector_get(_gsl_avg_vector, i);
01582 
01583   model_config->addNameValue( "AvgVector", values, _layer_count );
01584 
01585   // _gsl_stddev_vector
01586   for (int i=0; i < _layer_count; ++i)
01587     values[i] = gsl_vector_get(_gsl_stddev_vector, i);
01588 
01589   model_config->addNameValue( "StddevVector", values, _layer_count );
01590 
01591   // _gsl_eigenvalue_vector
01592   for (int i=0; i < _layer_count; ++i)
01593     values[i] = gsl_vector_get(_gsl_eigenvalue_vector, i);
01594 
01595   model_config->addNameValue( "EigenvalueVector", values, _layer_count );
01596 
01597   // _gsl_factor_weights
01598   for (int i=0; i < _layer_count; ++i)
01599     values[i] = gsl_vector_get(_gsl_factor_weights, i);
01600 
01601   model_config->addNameValue( "FactorWeights", values, _layer_count );
01602 
01603   delete[] values;
01604 
01605   // _gsl_eigenvector_matrix
01606   int num_cells = _layer_count * _layer_count;
01607 
01608   double *flat_eigenvector_matrix = new double[num_cells];
01609 
01610   int cnt = 0;
01611 
01612   for (int i=0; i < _layer_count; ++i)
01613     for (int j=0; j < _layer_count; ++j, ++cnt)
01614       flat_eigenvector_matrix[cnt] = gsl_matrix_get( _gsl_eigenvector_matrix, i, j );
01615 
01616   model_config->addNameValue( "EigenvectorMatrix", flat_eigenvector_matrix, num_cells );
01617 
01618   delete[] flat_eigenvector_matrix;
01619 
01620   // _gsl_score_matrix
01621   num_cells = _layer_count * _layer_count;
01622 
01623   double *flat_score_matrix = new double[num_cells];
01624 
01625   cnt = 0;
01626 
01627   for (int i=0; i < _layer_count; ++i)
01628     for (int j=0; j < _layer_count; ++j, ++cnt)
01629       flat_score_matrix[cnt] = gsl_matrix_get( _gsl_score_matrix, i, j );
01630 
01631   model_config->addNameValue( "ScoreMatrix", flat_score_matrix, num_cells );
01632 
01633   delete[] flat_score_matrix;
01634 
01635   // _gsl_environment_factor_matrix
01636   num_cells = _localityCount * _layer_count;
01637 
01638   double *flat_environment_factor_matrix = new double[num_cells];
01639 
01640   cnt = 0;
01641 
01642   for (int i=0; i < _localityCount; ++i)
01643     for (int j=0; j < _layer_count; ++j, ++cnt)
01644       flat_environment_factor_matrix[cnt] = gsl_matrix_get( _gsl_environment_factor_matrix, i, j );
01645 
01646   model_config->addNameValue( "EnvironmentFactorMatrix", flat_environment_factor_matrix, num_cells );
01647 
01648   delete[] flat_environment_factor_matrix;
01649 
01650   // _gsl_geomean_vector
01651   values = new double[_localityCount];
01652 
01653   for (int i=0; i < _localityCount; ++i)
01654     values[i] = gsl_vector_get(_gsl_geomean_vector, i);
01655 
01656   model_config->addNameValue( "LocalityGeomeans", values, _localityCount );
01657 
01658   delete[] values;
01659 
01660 }
01661 
01662 void
01663 Enfa::_setConfiguration( const ConstConfigurationPtr& config )
01664 {
01665   ConstConfigurationPtr model_config = config->getSubsection( "Enfa",false );
01666 
01667   if (!model_config)
01668     return;
01669 
01670   // _retained_components_count
01671   _retained_components_count = model_config->getAttributeAsInt( "RetainedComponents", 0 );
01672 
01673   // _marginality
01674   _marginality = model_config->getAttributeAsDouble( "Marginality", 0.0 );
01675 
01676   // _specialisation
01677   _specialisation = model_config->getAttributeAsDouble( "Specialisation", 0.0 );
01678 
01679   // _gsl_avg_vector
01680   std::vector<double> stl_vector = model_config->getAttributeAsVecDouble( "AvgVector" );
01681 
01682   _layer_count = stl_vector.size();
01683 
01684   _gsl_avg_vector = gsl_vector_alloc( _layer_count );
01685 
01686   for (int i=0; i < _layer_count; ++i)
01687     gsl_vector_set( _gsl_avg_vector, i, stl_vector[i] );
01688 
01689   // _gsl_avg_background_vector
01690   stl_vector = model_config->getAttributeAsVecDouble( "AvgBackgroundVector" );
01691 
01692   _layer_count = stl_vector.size();
01693 
01694   _gsl_avg_background_vector = gsl_vector_alloc( _layer_count );
01695 
01696   for (int i=0; i < _layer_count; ++i)
01697     gsl_vector_set( _gsl_avg_background_vector, i, stl_vector[i] );
01698 
01699   // _gsl_stddev_vector
01700   stl_vector = model_config->getAttributeAsVecDouble( "StddevVector" );
01701 
01702   _gsl_stddev_vector = gsl_vector_alloc( _layer_count );
01703 
01704   for (int i=0; i < _layer_count; ++i)
01705     gsl_vector_set( _gsl_stddev_vector, i, stl_vector[i] );
01706 
01707   // _gsl_stddev_background_vector
01708   stl_vector = model_config->getAttributeAsVecDouble( "StddevBackgroundVector" );
01709 
01710   _gsl_stddev_background_vector = gsl_vector_alloc( _layer_count );
01711 
01712   for (int i=0; i < _layer_count; ++i)
01713     gsl_vector_set( _gsl_stddev_background_vector, i, stl_vector[i] );
01714 
01715   // _gsl_eigenvalue_vector
01716   stl_vector = model_config->getAttributeAsVecDouble( "EigenvalueVector" );
01717 
01718   _layer_count = stl_vector.size();
01719 
01720   _gsl_eigenvalue_vector = gsl_vector_alloc( _layer_count );
01721 
01722   for (int i=0; i < _layer_count; ++i)
01723     gsl_vector_set( _gsl_eigenvalue_vector, i, stl_vector[i] );
01724 
01725   // _gsl_factor_weights
01726   stl_vector = model_config->getAttributeAsVecDouble( "FactorWeights" );
01727 
01728   _layer_count = stl_vector.size();
01729 
01730   _gsl_factor_weights = gsl_vector_alloc( _layer_count );
01731 
01732   for (int i=0; i < _layer_count; ++i)
01733     gsl_vector_set( _gsl_factor_weights, i, stl_vector[i] );
01734 
01735   // _gsl_eigenvector_matrix
01736   stl_vector = model_config->getAttributeAsVecDouble( "EigenvectorMatrix" );
01737 
01738   _gsl_eigenvector_matrix = gsl_matrix_alloc( _layer_count, _layer_count );
01739 
01740   int cnt = 0;
01741 
01742   for (int i=0; i < _layer_count; ++i)
01743     for (int j=0; j < _layer_count; ++j, ++cnt)
01744       gsl_matrix_set( _gsl_eigenvector_matrix, i, j, stl_vector[cnt] );
01745 
01746 
01747   // _gsl_score_matrix
01748   stl_vector = model_config->getAttributeAsVecDouble( "ScoreMatrix" );
01749 
01750   _gsl_score_matrix = gsl_matrix_alloc( _layer_count, _layer_count );
01751 
01752   cnt = 0;
01753 
01754   for (int i=0; i < _layer_count; ++i)
01755     for (int j=0; j < _layer_count; ++j, ++cnt)
01756       gsl_matrix_set( _gsl_score_matrix, i, j, stl_vector[cnt] );
01757 
01758 
01759   // _gsl_environment_factor_matrix
01760   stl_vector = model_config->getAttributeAsVecDouble( "EnvironmentFactorMatrix" );
01761 
01762   _localityCount = stl_vector.size()/_layer_count;
01763 
01764   _gsl_environment_factor_matrix = gsl_matrix_alloc( _localityCount, _layer_count );
01765 
01766   cnt = 0;
01767 
01768   for (int i=0; i < _localityCount; ++i)
01769     for (int j=0; j < _layer_count; ++j, ++cnt)
01770       gsl_matrix_set( _gsl_environment_factor_matrix, i, j, stl_vector[cnt] );
01771 
01772 
01773   // _gsl_geomean_vector
01774   stl_vector = model_config->getAttributeAsVecDouble( "LocalityGeomeans" );
01775 
01776   _gsl_geomean_vector = gsl_vector_alloc( _localityCount );
01777 
01778   for (int i=0; i < _localityCount; ++i)
01779     gsl_vector_set( _gsl_geomean_vector, i, stl_vector[i] );
01780 
01781   _done = true;
01782 
01783 }