13 #ifndef MLPACK_CORE_DATA_STRING_ENCODING_HPP 14 #define MLPACK_CORE_DATA_STRING_ENCODING_HPP 17 #include <mlpack/core/boost_backport/boost_backport_string_view.hpp> 33 template<
typename EncodingPolicyType,
34 typename DictionaryType>
42 template<
typename ... ArgTypes>
61 StringEncoding&
operator=(
const StringEncoding&) =
default;
67 StringEncoding&
operator=(StringEncoding&&) =
default;
84 template<
typename TokenizerType>
86 const TokenizerType& tokenizer);
118 template<
typename OutputType,
typename TokenizerType>
119 void Encode(
const std::vector<std::string>& input,
121 const TokenizerType& tokenizer);
124 const DictionaryType&
Dictionary()
const {
return dictionary; }
136 template<
typename Archive>
137 void serialize(Archive& ar,
const uint32_t );
168 template<
typename OutputType,
typename TokenizerType,
typename PolicyType>
169 void EncodeHelper(
const std::vector<std::string>& input,
171 const TokenizerType& tokenizer,
197 template<
typename TokenizerType,
typename PolicyType,
typename ElemType>
198 void EncodeHelper(
const std::vector<std::string>& input,
199 std::vector<std::vector<ElemType>>& output,
200 const TokenizerType& tokenizer,
203 PolicyType>::onePassEncoding>::type* = 0);
207 EncodingPolicyType encodingPolicy;
209 DictionaryType dictionary;
216 #include "string_encoding_impl.hpp" StringEncoding(ArgTypes &&... args)
Pass the given arguments to the policy constructor and create the StringEncoding object using the pol...
This is a template struct that provides some information about various encoding policies.
Linear algebra utility functions, generally performed on matrices or vectors.
void CreateMap(const std::string &input, const TokenizerType &tokenizer)
Initialize the dictionary using the given corpus.
The core includes that mlpack expects; standard C++ includes and Armadillo.
void serialize(Archive &ar, const uint32_t)
Serialize the class to the given archive.
The class translates a set of strings into numbers using various encoding algorithms.
const EncodingPolicyType & EncodingPolicy() const
Return the encoding policy object.
EncodingPolicyType & EncodingPolicy()
Modify the encoding policy object.
StringEncoding & operator=(const StringEncoding &)=default
Default copy assignment operator.
void Clear()
Clear the dictionary.
const DictionaryType & Dictionary() const
Return the dictionary.
DictionaryType & Dictionary()
Modify the dictionary.
void Encode(const std::vector< std::string > &input, OutputType &output, const TokenizerType &tokenizer)
Encode the given text and write the result to the given output.