13 #ifndef MLPACK_CORE_DATA_STR_ENCODING_POLICIES_BAG_OF_WORDS_ENCODING_POLICY_HPP 14 #define MLPACK_CORE_DATA_STR_ENCODING_POLICIES_BAG_OF_WORDS_ENCODING_POLICY_HPP 58 template<
typename MatType>
60 const size_t datasetSize,
62 const size_t dictionarySize)
64 output.zeros(dictionarySize, datasetSize);
81 template<
typename ElemType>
82 static void InitMatrix(std::vector<std::vector<ElemType>>& output,
83 const size_t datasetSize,
85 const size_t dictionarySize)
87 output.resize(datasetSize, std::vector<ElemType>(dictionarySize));
102 template<
typename MatType>
109 output(value - 1, line) += 1;
127 template<
typename ElemType>
128 static void Encode(std::vector<std::vector<ElemType>>& output,
134 output[line][value - 1] += 1;
152 template<
typename Archive>
165 template<
typename TokenType>
Definition of the BagOfWordsEncodingPolicy class.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.
static void InitMatrix(std::vector< std::vector< ElemType >> &output, const size_t datasetSize, const size_t, const size_t dictionarySize)
The function initializes the output matrix.
The class translates a set of strings into numbers using various encoding algorithms.
static void Encode(MatType &output, const size_t value, const size_t line, const size_t)
The function performs the bag of words encoding algorithm i.e.
static void PreprocessToken(size_t, size_t, size_t)
The function is not used by the bag of words encoding policy.
This class provides a dictionary interface for the purpose of string encoding.
static void Reset()
Clear the necessary internal variables.
static void InitMatrix(MatType &output, const size_t datasetSize, const size_t, const size_t dictionarySize)
The function initializes the output matrix.
void serialize(Archive &, const uint32_t)
Serialize the class to the given archive.
static void Encode(std::vector< std::vector< ElemType >> &output, const size_t value, const size_t line, const size_t)
The function performs the bag of words encoding algorithm i.e.