multihead_attention.hpp
Go to the documentation of this file.
1 
24 #ifndef MLPACK_METHODS_ANN_LAYER_MULTIHEAD_ATTENTION_HPP
25 #define MLPACK_METHODS_ANN_LAYER_MULTIHEAD_ATTENTION_HPP
26 
27 #include <mlpack/prereqs.hpp>
32 
33 namespace mlpack {
34 namespace ann {
35 
57 template <
58  typename InputDataType = arma::mat,
59  typename OutputDataType = arma::mat,
60  typename RegularizerType = NoRegularizer
61 >
62 class MultiheadAttention
63 {
64  public:
69 
78  MultiheadAttention(const size_t tgtSeqLen,
79  const size_t srcSeqLen,
80  const size_t embedDim,
81  const size_t numHeads);
82 
86  void Reset();
87 
95  template<typename eT>
96  void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output);
97 
106  template<typename eT>
107  void Backward(const arma::Mat<eT>& /* input */,
108  const arma::Mat<eT>& gy,
109  arma::Mat<eT>& g);
110 
118  template<typename eT>
119  void Gradient(const arma::Mat<eT>& input,
120  const arma::Mat<eT>& error,
121  arma::Mat<eT>& gradient);
122 
124  size_t WeightSize() const { return 4 * (embedDim + 1) * embedDim; }
125 
129  template<typename Archive>
130  void serialize(Archive& ar, const uint32_t /* version */);
131 
133  size_t TgtSeqLen() const { return tgtSeqLen; }
135  size_t& TgtSeqLen() { return tgtSeqLen; }
136 
138  size_t SrcSeqLen() const { return srcSeqLen; }
140  size_t& SrcSeqLen() { return srcSeqLen; }
141 
143  size_t EmbedDim() const { return embedDim; }
145  size_t& EmbedDim() { return embedDim; }
146 
148  size_t NumHeads() const { return numHeads; }
150  size_t& NumHeads() { return numHeads; }
151 
153  OutputDataType const& AttentionMask() const { return attnMask; }
155  OutputDataType& AttentionMask() { return attnMask; }
156 
158  OutputDataType const& KeyPaddingMask() const { return keyPaddingMask; }
160  OutputDataType& KeyPaddingMask() { return keyPaddingMask; }
161 
163  OutputDataType const& OutputParameter() const { return outputParameter; }
165  OutputDataType& OutputParameter() { return outputParameter; }
166 
168  OutputDataType const& Delta() const { return delta; }
170  OutputDataType& Delta() { return delta; }
171 
173  OutputDataType const& Gradient() const { return grad; }
175  OutputDataType& Gradient() { return grad; }
176 
178  OutputDataType const& Parameters() const { return weights; }
180  OutputDataType& Parameters() { return weights; }
181 
182  size_t InputShape() const
183  {
184  return embedDim * (tgtSeqLen + 2 * srcSeqLen);
185  }
186 
187  private:
189  typedef typename OutputDataType::elem_type ElemType;
190 
192  size_t tgtSeqLen;
193 
195  size_t srcSeqLen;
196 
198  size_t embedDim;
199 
201  size_t numHeads;
202 
204  size_t headDim;
205 
207  OutputDataType attnMask;
208 
210  OutputDataType keyPaddingMask;
211 
213  OutputDataType queryWt;
214 
216  OutputDataType keyWt;
217 
219  OutputDataType valueWt;
220 
222  OutputDataType outWt;
223 
225  OutputDataType qBias;
226 
228  OutputDataType kBias;
229 
231  OutputDataType vBias;
232 
234  OutputDataType outBias;
235 
237  OutputDataType weights;
238 
240  arma::Cube<ElemType> qProj;
241 
243  arma::Cube<ElemType> kProj;
244 
246  arma::Cube<ElemType> vProj;
247 
249  arma::Cube<ElemType> scores;
250 
252  arma::Cube<ElemType> attnOut;
253 
256 
258  OutputDataType delta;
259 
261  OutputDataType grad;
262 
264  OutputDataType outputParameter;
265 
267  RegularizerType regularizer;
268 }; // class MultiheadAttention
269 } // namespace ann
270 } // namespace mlpack
271 
272 // Include implementation.
273 #include "multihead_attention_impl.hpp"
274 
275 #endif
OutputDataType const & Delta() const
Get the delta.
OutputDataType & Parameters()
Modify the parameters.
Linear algebra utility functions, generally performed on matrices or vectors.
size_t & NumHeads()
Modify the number of attention heads.
The core includes that mlpack expects; standard C++ includes and Armadillo.
OutputDataType & KeyPaddingMask()
Modify the Key Padding Mask.
size_t & TgtSeqLen()
Modify the target sequence length.
OutputDataType const & AttentionMask() const
Get the two dimensional Attention Mask.
OutputDataType const & Parameters() const
Get the parameters.
OutputDataType & Gradient()
Modify the gradient.
Implementation of the Softmax layer.
Definition: softmax.hpp:38
size_t NumHeads() const
Get the number of attention heads.
size_t SrcSeqLen() const
Get the source sequence length.
size_t & SrcSeqLen()
Modify the source sequence length.
void Forward(const arma::Mat< eT > &input, arma::Mat< eT > &output)
Ordinary feed forward pass of a neural network, evaluating the function f(x) by propagating the activ...
OutputDataType const & OutputParameter() const
Get the output parameter.
OutputDataType const & KeyPaddingMask() const
Get Key Padding Mask.
size_t TgtSeqLen() const
Get the target sequence length.
OutputDataType & AttentionMask()
Modify the two dimensional Attention Mask.
size_t WeightSize() const
Get the size of the weights.
MultiheadAttention()
Default constructor.
OutputDataType & OutputParameter()
Modify the output parameter.
void Backward(const arma::Mat< eT > &, const arma::Mat< eT > &gy, arma::Mat< eT > &g)
Ordinary feed backward pass of a neural network, calculating the function f(x) by propagating x backw...
size_t & EmbedDim()
Modify the embedding dimension.
OutputDataType & Delta()
Modify the delta.
size_t EmbedDim() const
Get the embedding dimension.
void serialize(Archive &ar, const uint32_t)
Serialize the layer.
OutputDataType const & Gradient() const
Get the gradient.
void Reset()
Reset the layer parameters.