hoeffding_numeric_split.hpp
Go to the documentation of this file.
1 
14 #ifndef MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_NUMERIC_SPLIT_HPP
15 #define MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_NUMERIC_SPLIT_HPP
16 
17 #include <mlpack/prereqs.hpp>
18 #include "numeric_split_info.hpp"
19 
20 namespace mlpack {
21 namespace tree {
22 
51 template<typename FitnessFunction,
52  typename ObservationType = double>
54 {
55  public:
58 
68  HoeffdingNumericSplit(const size_t numClasses = 0,
69  const size_t bins = 10,
70  const size_t observationsBeforeBinning = 100);
71 
76  HoeffdingNumericSplit(const size_t numClasses,
77  const HoeffdingNumericSplit& other);
78 
88  void Train(ObservationType value, const size_t label);
89 
102  void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
103  const;
104 
106  size_t NumChildren() const { return bins; }
107 
112  void Split(arma::Col<size_t>& childMajorities, SplitInfo& splitInfo) const;
113 
115  size_t MajorityClass() const;
117  double MajorityProbability() const;
118 
120  size_t Bins() const { return bins; }
121 
123  template<typename Archive>
124  void serialize(Archive& ar, const uint32_t /* version */);
125 
126  private:
128  arma::Col<ObservationType> observations;
130  arma::Col<size_t> labels;
131 
133  arma::Col<ObservationType> splitPoints;
135  size_t bins;
137  size_t observationsBeforeBinning;
139  size_t samplesSeen;
140 
142  arma::Mat<size_t> sufficientStatistics;
143 };
144 
146 template<typename FitnessFunction>
147 using HoeffdingDoubleNumericSplit = HoeffdingNumericSplit<FitnessFunction,
148  double>;
149 
150 } // namespace tree
151 } // namespace mlpack
152 
153 // Include implementation.
154 #include "hoeffding_numeric_split_impl.hpp"
155 
156 #endif
HoeffdingNumericSplit(const size_t numClasses=0, const size_t bins=10, const size_t observationsBeforeBinning=100)
Create the HoeffdingNumericSplit class, and specify some basic parameters about how the binning shoul...
void serialize(Archive &ar, const uint32_t)
Serialize the object.
Linear algebra utility functions, generally performed on matrices or vectors.
size_t MajorityClass() const
Return the majority class.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void Split(arma::Col< size_t > &childMajorities, SplitInfo &splitInfo) const
Return the majority class of each child to be created, if a split on this dimension was performed...
The HoeffdingNumericSplit class implements the numeric feature splitting strategy alluded to by Domin...
void EvaluateFitnessFunction(double &bestFitness, double &secondBestFitness) const
Evaluate the fitness function given what has been calculated so far.
size_t NumChildren() const
Return the number of children if this node splits on this feature.
void Train(ObservationType value, const size_t label)
Train the HoeffdingNumericSplit on the given observed value (remember that this object only cares abo...
double MajorityProbability() const
Return the probability of the majority class.
NumericSplitInfo< ObservationType > SplitInfo
The splitting information type required by the HoeffdingNumericSplit.
size_t Bins() const
Return the number of bins.