missing_policy.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
13 #define MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <unordered_map>
18 #include <limits>
19 #include <set>
20 
21 namespace mlpack {
22 namespace data {
23 
32 {
33  public:
34  // typedef of MappedType
35  using MappedType = double;
36 
38  {
39  // Nothing to initialize here.
40  }
41 
49  explicit MissingPolicy(std::set<std::string> missingSet) :
50  missingSet(std::move(missingSet))
51  {
52  // Nothing to initialize here.
53  }
54 
56  static const bool NeedsFirstPass = false;
57 
62  template<typename T>
63  void MapFirstPass(const std::string& /* string */, const size_t /* dim */)
64  {
65  // Nothing to do.
66  }
67 
83  template<typename MapType, typename T>
84  T MapString(const std::string& string,
85  const size_t dimension,
86  MapType& maps,
87  std::vector<Datatype>& /* types */)
88  {
89  static_assert(std::numeric_limits<T>::has_quiet_NaN == true,
90  "Cannot use MissingPolicy with types where has_quiet_NaN() is false!");
91 
92  // If we can load the string then there is no need for mapping.
93  std::stringstream token;
94  token.str(string);
95  T t;
96  token >> t; // Could be sped up by only doing this if we need to.
97 
98  MappedType value = std::numeric_limits<MappedType>::quiet_NaN();
99  // But we can't use that for the map, so we need some other thing that will
100  // represent quiet_NaN().
101  const MappedType mapValue = std::nexttoward(
102  std::numeric_limits<MappedType>::max(), MappedType(0));
103 
104  // If extraction of the value fails, or if it is a value that is supposed to
105  // be mapped, then do mapping.
106  if (token.fail() || !token.eof() ||
107  missingSet.find(string) != std::end(missingSet))
108  {
109  // Everything is mapped to NaN. However we must still keep track of
110  // everything that we have mapped, so we add it to the maps if needed.
111  if (maps.count(dimension) == 0 ||
112  maps[dimension].first.count(string) == 0)
113  {
114  // This string does not exist yet.
115  typedef std::pair<std::string, MappedType> PairType;
116  maps[dimension].first.insert(PairType(string, value));
117 
118  // Insert right mapping too.
119  if (maps[dimension].second.count(mapValue) == 0)
120  {
121  // Create new element in reverse map.
122  maps[dimension].second.insert(std::make_pair(mapValue,
123  std::vector<std::string>()));
124  }
125  maps[dimension].second[mapValue].push_back(string);
126  }
127 
128  return value;
129  }
130  else
131  {
132  // We can just return the value that we read.
133  return t;
134  }
135  }
136 
137  private:
138  // Note that missingSet and maps are different.
139  // missingSet specifies which value/string should be mapped and may be a
140  // superset of 'maps'.
141  std::set<std::string> missingSet;
142 }; // class MissingPolicy
143 
144 } // namespace data
145 } // namespace mlpack
146 
147 #endif
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.
T MapString(const std::string &string, const size_t dimension, MapType &maps, std::vector< Datatype > &)
Given the string and the dimension to which it belongs by the user, and the maps and types given by t...
void MapFirstPass(const std::string &, const size_t)
There is nothing for us to do here, but this is required by the MapPolicy type.
static const bool NeedsFirstPass
This doesn&#39;t need a first pass over the data to set up.
MissingPolicy(std::set< std::string > missingSet)
Create the MissingPolicy object with the given missingSet.
MissingPolicy is used as a helper class for DatasetMapper.