split_by_any_of.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_CORE_DATA_TOKENIZERS_SPLIT_BY_ANY_OF_HPP
14 #define MLPACK_CORE_DATA_TOKENIZERS_SPLIT_BY_ANY_OF_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 #include <mlpack/core/boost_backport/boost_backport_string_view.hpp>
18 #include <array>
19 
20 namespace mlpack {
21 namespace data {
22 
27 {
28  public:
30  using TokenType = boost::string_view;
31 
33  using MaskType = std::array<bool, 1 << CHAR_BIT>;
34 
40  SplitByAnyOf(const boost::string_view delimiters)
41  {
42  mask.fill(false);
43 
44  for (char symbol : delimiters)
45  mask[static_cast<unsigned char>(symbol)] = true;
46  }
47 
54  boost::string_view operator()(boost::string_view& str) const
55  {
56  boost::string_view retval;
57 
58  while (retval.empty())
59  {
60  const std::size_t pos = FindFirstDelimiter(str);
61  if (pos == str.npos)
62  {
63  retval = str;
64  str.clear();
65  return retval;
66  }
67  retval = str.substr(0, pos);
68  str.remove_prefix(pos + 1);
69  }
70  return retval;
71  }
72 
78  static bool IsTokenEmpty(const boost::string_view token)
79  {
80  return token.empty();
81  }
82 
84  const MaskType& Mask() const { return mask; }
86  MaskType& Mask() { return mask; }
87 
88  private:
96  size_t FindFirstDelimiter(const boost::string_view str) const
97  {
98  for (size_t pos = 0; pos < str.size(); pos++)
99  {
100  if (mask[static_cast<unsigned char>(str[pos])])
101  return pos;
102  }
103  return str.npos;
104  }
105 
106  private:
108  MaskType mask;
109 };
110 
111 } // namespace data
112 } // namespace mlpack
113 
114 #endif
boost::string_view operator()(boost::string_view &str) const
The function extracts the first token from the given string view and then removes the prefix containi...
Linear algebra utility functions, generally performed on matrices or vectors.
std::array< bool, 1<< CHAR_BIT > MaskType
A convenient alias for the mask type.
The core includes that mlpack expects; standard C++ includes and Armadillo.
boost::string_view TokenType
The type of the token which the tokenizer extracts.
SplitByAnyOf(const boost::string_view delimiters)
Construct the object from the given delimiters.
MaskType & Mask()
Modify the mask.
The SplitByAnyOf class tokenizes a string using a set of delimiters.
const MaskType & Mask() const
Return the mask.
static bool IsTokenEmpty(const boost::string_view token)
The function returns true if the given token is empty.