37 #include "librf/instance_set.h"
38 #include "librf/tree.h"
39 #include "librf/weights.h"
42 using namespace librf;
49 #define NUMTREES_ID "NumTrees"
50 #define K_ID "VarsPerTree"
51 #define UNSUP_ID "ForceUnsupervisedLearning"
53 #define RF_LOG_PREFIX "RfAlgorithm: "
76 "Number of variables per tree",
78 "Number of variables per tree (zero defaults to the square root of the number of layers)",
79 "Number of variables per tree (zero defaults to the square root of the number of layers)",
89 "Force unsupervised learning",
91 "Force unsupervised learning",
92 "When absence points are provided, this parameter can be used to ignore them forcing unsupervised learning. Note that if no absences are provided, unsupervised learning will be used anyway.",
116 "Leo Breiman & Adele Cutler",
117 "Breiman, L. (2001). Random forests. Machine Learning, 45, 5-32.",
119 "Renato De Giovanni",
120 "renato [at] cria . org . br",
156 _initialized( false )
171 for (
unsigned int i = 0; i <
_trees.size(); ++i ) {
189 int num_layers =
_samp->numIndependent();
222 _k = int( sqrt(
double( num_layers ) ) );
226 bool force_unsupervised_learning =
false;
230 force_unsupervised_learning =
true;
236 int num_presences =
_samp->numPresence();
238 if ( num_presences == 0 ) {
246 unsigned int seed = (
unsigned int)
_rand.
get();
248 stringstream sdata(
"");
249 stringstream slabels(
"");
256 p_iterator = presences->begin();
257 p_end = presences->end();
259 while ( p_iterator != p_end ) {
261 Sample presence = (*p_iterator)->environment();
265 slabels <<
"0" << endl;
270 if (
_samp->numAbsence() && ! force_unsupervised_learning ) {
274 p_iterator = absences->begin();
275 p_end = absences->end();
277 while ( p_iterator != p_end ) {
279 Sample absence = (*p_iterator)->environment();
283 slabels <<
"1" << endl;
288 istream data( sdata.rdbuf() );
289 istream labels( slabels.rdbuf() );
291 _set = InstanceSet::load_csv_and_labels( data, labels );
297 istream data( sdata.rdbuf() );
299 _set = InstanceSet::load_unsupervised( data, &seed );
313 for (
unsigned int j = 0; j < sample.
size(); ++j ) {
315 ss << sample[j] <<
",";
329 weight_list* w =
new weight_list(
_set->size(),
_set->size());
332 for (
unsigned int j = 0; j <
_set->size(); ++j ) {
377 stringstream sdata(
"");
381 istream data( sdata.rdbuf() );
383 stringstream slabels(
"0");
385 istream labels( slabels.rdbuf() );
387 InstanceSet* set = InstanceSet::load_csv_and_labels( data, labels );
391 for (
unsigned int i = 0; i <
_trees.size(); ++i ) {
393 int predict =
_trees[i]->predict( *set, 0 );
394 votes.add( predict );
397 float prob = votes.percentage(0);
422 config->addSubsection( model_config );
424 model_config->addNameValue(
"Trees",
_num_trees );
425 model_config->addNameValue(
"K",
_k );
428 tree_node* p_node = NULL;
430 unsigned int num_nodes;
440 num_nodes = p_tree->num_nodes();
442 tree_config->addNameValue(
"Nodes", (
int)num_nodes );
444 sprintf( buffer,
"%4.2f", p_tree->training_accuracy() );
446 tree_config->addNameValue(
"Accuracy", buffer );
447 tree_config->addNameValue(
"Split", (
int)p_tree->num_split_nodes() );
448 tree_config->addNameValue(
"Terminal", (
int)p_tree->num_terminal_nodes() );
450 for (
unsigned int j= 0; j < num_nodes; ++j ) {
452 p_node = p_tree->get_node( j );
456 librf::NodeStatusType status = p_node->status;
458 node_config->addNameValue(
"Status", (
int)status );
460 if ( status == SPLIT ) {
462 node_config->addNameValue(
"L", (
int)p_node->left );
463 node_config->addNameValue(
"R", (
int)p_node->right );
464 node_config->addNameValue(
"A", (
int)p_node->attr );
465 node_config->addNameValue(
"S", (
float)p_node->split_point );
467 else if ( status == TERMINAL ) {
469 node_config->addNameValue(
"V", (
char)p_node->label );
472 tree_config->addSubsection( node_config );
475 model_config->addSubsection( tree_config );
484 if ( ! model_config )
487 _num_trees = model_config->getAttributeAsInt(
"Trees", 0 );
489 _k = model_config->getAttributeAsInt(
"K", 0 );
495 Configuration::subsection_list::iterator tree = trees.begin();
496 Configuration::subsection_list::iterator last_tree = trees.end();
498 for ( ; tree != last_tree; ++tree ) {
500 if ( (*tree)->getName() !=
"Tree" ) {
505 Tree* my_tree =
new Tree();
509 Configuration::subsection_list::iterator node = nodes.begin();
510 Configuration::subsection_list::iterator last_node = nodes.end();
512 for ( ; node != last_node; ++node ) {
514 if ( (*node)->getName() !=
"Node" ) {
519 int status = (*node)->getAttributeAsInt(
"Status", 0 );
523 if ( status == SPLIT ) {
525 my_node.status = SPLIT;
526 my_node.left = (*node)->getAttributeAsInt(
"L", 0 );
527 my_node.right = (*node)->getAttributeAsInt(
"R", 0 );
528 my_node.attr = (*node)->getAttributeAsInt(
"A", 0 );
529 double split_point = (*node)->getAttributeAsDouble(
"S", 0.0 );
530 my_node.split_point = (float)split_point;
532 else if ( status == TERMINAL ) {
534 my_node.status = TERMINAL;
535 int label = (*node)->getAttributeAsInt(
"V", 0 );
536 my_node.label = uchar(label);
543 my_tree->add_node( my_node );
546 _trees.push_back( my_tree );
double get(double min, double max)
void warn(const char *format,...)
'Warn' level.
Scalar getValue(const Sample &x) const
std::vector< ConfigurationPtr > subsection_list
void _setConfiguration(const ConstConfigurationPtr &)
OM_ALG_DLL_EXPORT AlgMetadata const * algorithmMetadata()
double Scalar
Type of map values.
void _getConfiguration(ConfigurationPtr &) const
vector< librf::Tree * > _trees
static Log * instance()
Returns the instance pointer, creating the object on the first call.
librf::InstanceSet * _set
void error(const char *format,...)
'Error' level.
float getProgress() const
int getParameter(std::string const &name, std::string *value)
void _sampleToLine(Sample sample, stringstream &ss) const
OM_ALG_DLL_EXPORT AlgorithmImpl * algorithmFactory()
vector< int > _class_weights
int getConvergence(Scalar *const val) const
std::vector< OccurrencePtr >::const_iterator const_iterator
static AlgParamMetadata parameters[NUM_PARAM]
static AlgMetadata metadata