13 #ifndef MLPACK_CORE_DATA_TOKENIZERS_SPLIT_BY_ANY_OF_HPP 14 #define MLPACK_CORE_DATA_TOKENIZERS_SPLIT_BY_ANY_OF_HPP 17 #include <mlpack/core/boost_backport/boost_backport_string_view.hpp> 33 using MaskType = std::array<bool, 1 << CHAR_BIT>;
44 for (
char symbol : delimiters)
45 mask[
static_cast<unsigned char>(symbol)] =
true;
54 boost::string_view
operator()(boost::string_view& str)
const 56 boost::string_view retval;
58 while (retval.empty())
60 const std::size_t pos = FindFirstDelimiter(str);
67 retval = str.substr(0, pos);
68 str.remove_prefix(pos + 1);
96 size_t FindFirstDelimiter(
const boost::string_view str)
const 98 for (
size_t pos = 0; pos < str.size(); pos++)
100 if (mask[static_cast<unsigned char>(str[pos])])
boost::string_view operator()(boost::string_view &str) const
The function extracts the first token from the given string view and then removes the prefix containi...
Linear algebra utility functions, generally performed on matrices or vectors.
std::array< bool, 1<< CHAR_BIT > MaskType
A convenient alias for the mask type.
The core includes that mlpack expects; standard C++ includes and Armadillo.
boost::string_view TokenType
The type of the token which the tokenizer extracts.
SplitByAnyOf(const boost::string_view delimiters)
Construct the object from the given delimiters.
MaskType & Mask()
Modify the mask.
The SplitByAnyOf class tokenizes a string using a set of delimiters.
const MaskType & Mask() const
Return the mask.
static bool IsTokenEmpty(const boost::string_view token)
The function returns true if the given token is empty.