12 #ifndef MLPACK_CORE_DATA_LOAD_CSV_HPP 13 #define MLPACK_CORE_DATA_LOAD_CSV_HPP 15 #include <boost/spirit/include/qi.hpp> 16 #include <boost/algorithm/string/trim.hpp> 43 LoadCSV(
const std::string& file);
54 template<
typename T,
typename PolicyType>
55 void Load(arma::Mat<T> &inout,
57 const bool transpose =
true)
62 TransposeParse(inout, infoSet);
64 NonTransposeParse(inout, infoSet);
77 template<
typename T,
typename MapPolicy>
80 using namespace boost::spirit;
89 inFile.seekg(0, std::ios::beg);
95 while (std::getline(inFile, line))
107 std::ostringstream oss;
108 oss <<
"data::LoadCSV(): given DatasetInfo has dimensionality " 111 throw std::invalid_argument(oss.str());
116 inFile.seekg(0, std::ios::beg);
119 while (std::getline(inFile, line))
128 auto findColSize = [&cols](iter_type) { ++cols; };
129 qi::parse(line.begin(), line.end(),
130 stringRule[findColSize] % delimiterRule);
135 if (MapPolicy::NeedsFirstPass)
138 auto firstPassMap = [&](
const iter_type& iter)
140 std::string str(iter.begin(), iter.end());
143 info.template MapFirstPass<T>(std::move(str), rows - 1);
147 qi::parse(line.begin(), line.end(),
148 stringRule[firstPassMap] % delimiterRule);
163 template<
typename T,
typename MapPolicy>
168 using namespace boost::spirit;
177 inFile.seekg(0, std::ios::beg);
182 while (std::getline(inFile, line))
191 auto findRowSize = [&rows](iter_type) { ++rows; };
192 qi::parse(line.begin(), line.end(),
193 stringRule[findRowSize] % delimiterRule);
202 std::ostringstream oss;
203 oss <<
"data::LoadCSV(): given DatasetInfo has dimensionality " 206 throw std::invalid_argument(oss.str());
211 if (MapPolicy::NeedsFirstPass)
216 auto firstPassMap = [&](
const iter_type& iter)
218 std::string str(iter.begin(), iter.end());
221 info.template MapFirstPass<T>(std::move(str), dim++);
225 qi::parse(line.begin(), line.end(),
226 stringRule[firstPassMap] % delimiterRule);
232 using iter_type = boost::iterator_range<std::string::iterator>;
246 template<
typename T,
typename PolicyType>
247 void NonTransposeParse(arma::Mat<T>& inout,
250 using namespace boost::spirit;
254 GetMatrixSize<T>(rows, cols, infoSet);
257 inout.set_size(rows, cols);
264 inFile.seekg(0, std::ios::beg);
266 auto setCharClass = [&](iter_type
const &iter)
268 std::string str(iter.begin(), iter.end());
275 inout(row, col++) = infoSet.template MapString<T>(std::move(str), row);
278 while (std::getline(inFile, line))
285 const bool canParse = qi::parse(line.begin(), line.end(),
286 stringRule[setCharClass] % delimiterRule);
291 std::ostringstream oss;
292 oss <<
"LoadCSV::NonTransposeParse(): wrong number of dimensions (" 293 << col <<
") on line " << row <<
"; should be " << cols
295 throw std::runtime_error(oss.str());
300 std::ostringstream oss;
301 oss <<
"LoadCSV::NonTransposeParse(): parsing error on line " << col
303 throw std::runtime_error(oss.str());
316 template<
typename T,
typename PolicyType>
319 using namespace boost::spirit;
323 GetTransposeMatrixSize<T>(rows, cols, infoSet);
326 inout.set_size(rows, cols);
333 inFile.seekg(0, std::ios::beg);
339 auto parseString = [&](iter_type
const &iter)
342 std::string str(iter.begin(), iter.end());
345 inout(row, col) = infoSet.template MapString<T>(std::move(str), row);
349 while (std::getline(inFile, line))
359 const bool canParse = qi::parse(line.begin(), line.end(),
360 stringRule[parseString] % delimiterRule);
365 std::ostringstream oss;
366 oss <<
"LoadCSV::TransposeParse(): wrong number of dimensions (" << row
367 <<
") on line " << col <<
"; should be " << rows <<
" dimensions.";
368 throw std::runtime_error(oss.str());
373 std::ostringstream oss;
374 oss <<
"LoadCSV::TransposeParse(): parsing error on line " << col
376 throw std::runtime_error(oss.str());
385 boost::spirit::qi::rule<std::string::iterator, iter_type()> stringRule;
387 boost::spirit::qi::rule<std::string::iterator, iter_type()> delimiterRule;
390 std::string extension;
392 std::string filename;
394 std::ifstream inFile;
Auxiliary information for a dataset, including mappings to/from strings (or other types) and the data...
Load the csv file.This class use boost::spirit to implement the parser, please refer to following lin...
void Load(arma::Mat< T > &inout, DatasetMapper< PolicyType > &infoSet, const bool transpose=true)
Load the file into the given matrix with the given DatasetMapper object.
Linear algebra utility functions, generally performed on matrices or vectors.
void GetTransposeMatrixSize(size_t &rows, size_t &cols, DatasetMapper< MapPolicy > &info)
Peek at the file to determine the number of rows and columns in the matrix, assuming a transposed mat...
LoadCSV(const std::string &file)
Construct the LoadCSV object on the given file.
void GetMatrixSize(size_t &rows, size_t &cols, DatasetMapper< MapPolicy > &info)
Peek at the file to determine the number of rows and columns in the matrix, assuming a non-transposed...
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
void SetDimensionality(const size_t dimensionality)
Set the dimensionality of an existing DatasetMapper object.
size_t Dimensionality() const
Get the dimensionality of the DatasetMapper object (that is, how many dimensions it has information f...