15 #ifndef MLPACK_CORE_DATA_DATASET_INFO_HPP 16 #define MLPACK_CORE_DATA_DATASET_INFO_HPP 19 #include <unordered_map> 40 template<
typename PolicyType,
typename InputType = std::
string>
56 explicit DatasetMapper(PolicyType& policy,
const size_t dimensionality = 0);
74 void MapFirstPass(
const InputType& input,
const size_t dimension);
88 const size_t dimension);
110 const size_t dimension,
111 const size_t unmappingIndex = 0)
const;
117 size_t NumUnmappings(
const T value,
const size_t dimension)
const;
128 typename PolicyType::MappedType
UnmapValue(
const InputType& input,
129 const size_t dimension);
153 template<
typename Archive>
156 ar(CEREAL_NVP(types));
157 ar(CEREAL_NVP(maps));
161 const PolicyType&
Policy()
const;
166 void Policy(PolicyType&& policy);
170 std::vector<Datatype> types;
173 using ForwardMapType =
typename std::unordered_map<InputType,
typename 174 PolicyType::MappedType>;
178 using ReverseMapType = std::unordered_map<
typename PolicyType::MappedType,
179 std::vector<InputType>>;
184 using MapType = std::unordered_map<size_t, std::pair<ForwardMapType,
201 #include "dataset_mapper_impl.hpp" T MapString(const InputType &input, const size_t dimension)
Given the input and the dimension to which it belongs, return its numeric mapping.
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
DatasetMapper(const size_t dimensionality=0)
Create the DatasetMapper object with the given dimensionality.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.
const InputType & UnmapString(const T value, const size_t dimension, const size_t unmappingIndex=0) const
Return the input that corresponds to a given value in a given dimension.
PolicyType::MappedType UnmapValue(const InputType &input, const size_t dimension)
Return the value that corresponds to a given input in a given dimension.
Datatype Type(const size_t dimension) const
Return the type of a given dimension (numeric or categorical).
size_t NumMappings(const size_t dimension) const
Get the number of mappings for a particular dimension.
void MapFirstPass(const InputType &input, const size_t dimension)
Preprocessing: during a first pass of the data, pass the input on to the MapPolicy if they are needed...
size_t NumUnmappings(const T value, const size_t dimension) const
Get the number of possible unmappings for a string in a given dimension.
void SetDimensionality(const size_t dimensionality)
Set the dimensionality of an existing DatasetMapper object.
void serialize(Archive &ar, const uint32_t)
Serialize the dataset information.
Datatype
The Datatype enum specifies the types of data mlpack algorithms can use.
size_t Dimensionality() const
Get the dimensionality of the DatasetMapper object (that is, how many dimensions it has information f...
const PolicyType & Policy() const
Return the policy of the mapper.