13 #ifndef MLPACK_CORE_DATA_STRING_ENCODING_DICTIONARY_HPP 14 #define MLPACK_CORE_DATA_STRING_ENCODING_DICTIONARY_HPP 17 #include <mlpack/core/boost_backport/boost_backport_string_view.hpp> 18 #include <unordered_map> 31 template<
typename Token>
36 using MapType = std::unordered_map<Token, size_t>;
48 return mapping.find(token) != mapping.end();
61 size_t size = mapping.size();
63 mapping[std::forward<T>(token)] = ++size;
74 size_t Value(
const Token& token)
const 76 return mapping.at(token);
80 size_t Size()
const {
return mapping.size(); }
96 template<
typename Archive>
99 ar(CEREAL_NVP(mapping));
115 using MapType = std::unordered_map<
118 boost::hash<boost::string_view>>;
130 for (
const std::string& token : tokens)
131 mapping[token] = other.mapping.at(token);
140 tokens = other.tokens;
143 for (
const std::string& token : tokens)
144 mapping[token] = other.mapping.at(token);
158 bool HasToken(
const boost::string_view token)
const 160 return mapping.find(token) != mapping.end();
172 tokens.emplace_back(token);
174 size_t size = mapping.size();
176 mapping[tokens.back()] = ++size;
187 size_t Value(
const boost::string_view token)
const 189 return mapping.at(token);
193 size_t Size()
const {
return mapping.size(); }
203 const std::deque<std::string>&
Tokens()
const {
return tokens; }
205 std::deque<std::string>&
Tokens() {
return tokens; }
215 template<
typename Archive>
218 size_t numTokens = tokens.size();
220 ar(CEREAL_NVP(numTokens));
222 if (cereal::is_loading<Archive>())
224 tokens.resize(numTokens);
226 for (std::string& token : tokens)
228 ar(CEREAL_NVP(token));
230 size_t tokenValue = 0;
231 ar(CEREAL_NVP(tokenValue));
232 mapping[token] = tokenValue;
235 if (cereal::is_saving<Archive>())
237 for (std::string& token : tokens)
239 ar(CEREAL_NVP(token));
241 size_t tokenValue = mapping.at(token);
242 ar(CEREAL_NVP(tokenValue));
249 std::deque<std::string> tokens;
260 using MapType = std::array<size_t, 1 << CHAR_BIT>;
280 return mapping[token] > 0;
293 mapping[token] = ++size;
307 return mapping[token];
330 template<
typename Archive>
333 ar(CEREAL_NVP(mapping));
334 ar(CEREAL_NVP(size));
const MapType & Mapping() const
Get the mapping.
size_t AddToken(const int token)
The function adds the given token to the dictionary and assigns a label to the token.
size_t AddToken(T &&token)
The function adds the given token to the dictionary and assigns a label to the token.
MapType & Mapping()
Modify the mapping.
MapType & Mapping()
Modify the mapping.
StringEncodingDictionary()
Construct the default class.
Linear algebra utility functions, generally performed on matrices or vectors.
std::unordered_map< boost::string_view, size_t, boost::hash< boost::string_view > > MapType
A convenient alias for the internal type of the map.
const std::deque< std::string > & Tokens() const
Get the tokens.
The core includes that mlpack expects; standard C++ includes and Armadillo.
bool HasToken(const int token) const
The function returns true if the dictionary contains the given token.
size_t Value(const Token &token) const
The function returns the label assigned to the given token.
size_t Size() const
Get the size of the dictionary.
void serialize(Archive &ar, const uint32_t)
Serialize the class to the given archive.
void Clear()
Clear the dictionary.
std::array< size_t, 1<< CHAR_BIT > MapType
A convenient alias for the internal type of the map.
bool HasToken(const boost::string_view token) const
The function returns true if the dictionary contains the given token.
void serialize(Archive &ar, const uint32_t)
Serialize the class to the given archive.
This class provides a dictionary interface for the purpose of string encoding.
boost::string_view TokenType
The type of the token that the dictionary stores.
StringEncodingDictionary(const StringEncodingDictionary &other)
Copy the class from the given object.
void Clear()
Clear the dictionary.
std::deque< std::string > & Tokens()
Modify the tokens.
MapType & Mapping()
Modify the mapping.
const MapType & Mapping() const
Get the mapping.
Token TokenType
The type of the token that the dictionary stores.
int TokenType
The type of the token that the dictionary stores.
bool HasToken(const Token &token) const
The function returns true if the dictionary contains the given token.
const MapType & Mapping() const
Get the mapping.
size_t AddToken(const boost::string_view token)
The function adds the given token to the dictionary and assigns a label to the token.
void serialize(Archive &ar, const uint32_t)
Serialize the class to the given archive.
size_t Size() const
Get the size of the dictionary.
void Clear()
Clear the dictionary.
size_t Size() const
Get the size of the dictionary.
size_t Value(const boost::string_view token) const
The function returns the label assigned to the given token.
size_t Value(const int token) const
The function returns the label assigned to the given token.
std::unordered_map< Token, size_t > MapType
A convenient alias for the internal type of the map.
StringEncodingDictionary & operator=(const StringEncodingDictionary &other)
Copy the class from the given object.