openModeller  Version 1.4.0
regression.cpp
Go to the documentation of this file.
00001 
00036 #include <openmodeller/om.hh>
00037 #include <openmodeller/Sample.hh>
00038 
00039 #include "regression.hh"
00040 
00041 Regression::Regression() :
00042   _dimensions(0),
00043   _resamples(0),
00044   _paramA(),
00045   _paramB(),
00046   _paramC()
00047 { }
00048 
00049 
00050 void Regression::calculateParameters(const OccurrencesPtr& occs)
00051 {
00052   //next two lines commented out since the vars arent used
00053   //Scalar y;         // outcome (dependent) variable for sample
00054   //Scalar xx;        // temp storage for x^2
00055   Sample x;         // predictor (independent) variable for current sample
00056   Scalar s_y;      // sum of y for all samples
00057   Sample s_x;      // sum of x for all samples
00058   Sample s_xy;    // sum of x*y for all samples
00059   Sample s_xx;     // sum of x^2 for all samples
00060   Sample s_xxy;   // sum of y*x^2 for all samples
00061   Sample s_x4;     // sum of x^4 for all samples
00062 
00063   int n = _resamples = occs->numOccurrences();
00064   
00065   // number of layers/genes/independent variables
00066   _dimensions = (*occs)[0]->environment().size();
00067   
00068   s_y = 0.0;
00069   s_x = s_xy = s_xx = s_xxy = s_x4 = Sample(_dimensions);
00070 
00071   // iterate through the samples
00072   OccurrencesImpl::const_iterator oc_it = occs->begin();
00073   OccurrencesImpl::const_iterator oc_end = occs->end();
00074 
00075   while (oc_it != oc_end)
00076     {
00077       Scalar y = ( (*oc_it)->abundance() > 0.0 ) ? 1.0 : 0.0;
00078       Sample x = (*oc_it)->environment();
00079 
00080       Sample::iterator xit = x.begin();
00081       Sample::iterator end = x.end();
00082       Sample::iterator s_xi = s_x.begin();
00083       Sample::iterator s_xxi = s_xx.begin();
00084       Sample::iterator s_xyi = s_xy.begin();
00085       Sample::iterator s_xxyi = s_xxy.begin();
00086       Sample::iterator s_x4i = s_x4.begin();
00087 
00088       s_y += y;
00089 
00090       //printf("Regression: Resample %5d\n", i);
00091 
00092       while (xit != end)
00093   {
00094     Scalar xi = (*xit);
00095     Scalar xx = xi * xi;
00096 
00097     (*s_xi) += xi;
00098     (*s_xxi) += xx;
00099     (*s_xyi) += xi * y;
00100     (*s_xxyi) += xx * y; 
00101     (*s_x4i) += xx * xx;
00102 
00103     ++xit;
00104     ++s_xi;
00105     ++s_xxi;
00106     ++s_xyi;
00107     ++s_xxyi;
00108     ++s_x4i;
00109   }
00110 
00111       oc_it++;
00112     }
00113 
00114   //printf("Sum of abundances: %7.3f\n", s_y);
00115 
00116   _paramC = _paramB = _paramA = Sample(_dimensions);
00117 
00118   Sample::iterator pai = _paramA.begin();
00119   Sample::iterator pbi = _paramB.begin();
00120   Sample::iterator pci = _paramC.begin();
00121   Sample::iterator end = _paramC.end();
00122   Sample::iterator s_xi = s_x.begin();
00123   Sample::iterator s_xxi = s_xx.begin();
00124   Sample::iterator s_xyi = s_xy.begin();
00125   Sample::iterator s_xxyi = s_xxy.begin();
00126   Sample::iterator s_x4i = s_x4.begin();
00127 
00128   while (pci != end)
00129     {
00130       //printf("Regression: ParamIndex (%12d, %12d)\n", pai, end);
00131 
00132       (*pci) = ( n * (*s_xxyi) - ( (*s_xxi) * s_y ) ) / 
00133          ( n * (*s_x4i)  - ( (*s_xxi) * (*s_xxi) ) );
00134 
00135       (*pbi) = ( n * (*s_xyi)  - ( (*s_xi)  * s_y ) ) / 
00136          ( n * (*s_xxi)  - ( (*s_xi)  * (*s_xi) ) );
00137 
00138       (*pai) = ( s_y / n ) - ( (*pbi) * (*s_xi) / n );
00139 
00140       ++pai; 
00141       ++pbi; 
00142       ++pci;
00143       
00144       ++s_xi;
00145       ++s_xxi;
00146       ++s_xyi;
00147       ++s_xxyi;
00148       ++s_x4i;
00149     }
00150   
00151   /*
00152   Log::instance()->info("logit: a=%+8.4f b=%+8.4f c=%+8.4f n=%d\n", a, b, c, n);
00153   Log::instance()->info("aux: xi=%+8.4f yi=%+8.4f xiyi=%+8.4f xxi=%+8.4f xxiyi=%+8.4f xi4=%+8.4f\n", 
00154   s_xi, s_yi, s_xiyi, s_xxi, s_xxiyi, s_xi4);
00155   */
00156 }
00157