increment_policy.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_INCREMENT_POLICY_HPP
13 #define MLPACK_CORE_DATA_MAP_POLICIES_INCREMENT_POLICY_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <unordered_map>
18 
19 namespace mlpack {
20 namespace data {
21 
34 {
35  public:
36  IncrementPolicy(const bool forceAllMappings = false) :
37  forceAllMappings(forceAllMappings) { }
38 
39  // typedef of MappedType
40  using MappedType = size_t;
41 
43  static const bool NeedsFirstPass = true;
44 
48  template<typename T, typename InputType>
49  void MapFirstPass(const InputType& input,
50  const size_t dim,
51  std::vector<Datatype>& types)
52  {
53  if (types[dim] == Datatype::categorical)
54  {
55  // No need to check; it's already categorical.
56  return;
57  }
58 
59  if (forceAllMappings)
60  {
61  types[dim] = Datatype::categorical;
62  }
63  else
64  {
65  // Attempt to convert the input to an output type via a stringstream.
66  std::stringstream token;
67  token << input;
68  T val;
69  token >> val;
70 
71  if (token.fail() || !token.eof())
72  types[dim] = Datatype::categorical;
73  }
74  }
75 
89  template<typename MapType, typename T, typename InputType>
90  T MapString(const InputType& input,
91  const size_t dimension,
92  MapType& maps,
93  std::vector<Datatype>& types)
94  {
95  // If we are in a categorical dimension we already know we need to map.
96  if (types[dimension] == Datatype::numeric && !forceAllMappings)
97  {
98  // Check if this input needs to be mapped or if it can be read
99  // directly as a number. This will be true if nothing else in this
100  // dimension has yet been mapped, but this can't be read as a number.
101  std::stringstream token;
102  token << input;
103  T val;
104  token >> val;
105 
106  if (!token.fail() && token.eof())
107  return val;
108 
109  // Otherwise, we must map.
110  }
111 
112  // If this condition is true, either we have no mapping for the given input
113  // or we have no mappings for the given dimension at all. In either case,
114  // we create a mapping.
115  if (maps.count(dimension) == 0 ||
116  maps[dimension].first.count(input) == 0)
117  {
118  // This input does not exist yet.
119  size_t numMappings = maps[dimension].first.size();
120 
121  // Change type of the feature to categorical.
122  if (numMappings == 0)
123  types[dimension] = Datatype::categorical;
124 
125  typedef typename std::pair<InputType, MappedType> PairType;
126  maps[dimension].first.insert(PairType(input, numMappings));
127 
128  // Do we need to create the second map?
129  if (maps[dimension].second.count(numMappings) == 0)
130  {
131  maps[dimension].second.insert(std::make_pair(numMappings,
132  std::vector<InputType>()));
133  }
134  maps[dimension].second[numMappings].push_back(input);
135 
136  return T(numMappings);
137  }
138  else
139  {
140  // This input already exists in the mapping.
141  return maps[dimension].first.at(input);
142  }
143  }
144 
145  private:
146  // Whether or not we should map all tokens.
147  bool forceAllMappings;
148 }; // class IncrementPolicy
149 
150 } // namespace data
151 } // namespace mlpack
152 
153 #endif
IncrementPolicy is used as a helper class for DatasetMapper.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.
T MapString(const InputType &input, const size_t dimension, MapType &maps, std::vector< Datatype > &types)
Given the input and the dimension to which the it belongs, and the maps and types given by the Datase...
static const bool NeedsFirstPass
We do need a first pass over the data to set the dimension types right.
IncrementPolicy(const bool forceAllMappings=false)
void MapFirstPass(const InputType &input, const size_t dim, std::vector< Datatype > &types)
Determine if the dimension is numeric or categorical.