Program Listing for File DataSet.h#
↰ Return to documentation for file (src/main/cpp/lib/DataSet.h)
#ifndef DATASET_H_
#define DATASET_H_
#include <string>
#include <vector>
#include <algorithm>
#include <iterator>
#include <cfloat>
#include <stdexcept>
#include <memory>
#include <clocale>
#include "Utils.h"
template<typename T>
class DataSet
{
public:
struct Variable
{
T min;
T max;
std::string name;
std::string unit;
Variable (T min_, T max_, const std::string & name_,
const std::string & unit_ = "") :
min (min_), max (max_), name (name_), unit (unit_)
{
}
};
struct Cell
{
Cell () :
null (true), value (0)
{
}
Cell (T value_) :
null (false), value (value_)
{
}
const bool null;
const T value;
};
enum ColumnType
{
INPUT, OUTPUT
};
struct DataColumn
{
const ColumnType type;
Variable var;
std::vector<Cell> cells;
DataColumn (ColumnType type_, Variable var_) :
type (type_), var (var_)
{
}
};
class MockColumn
{
public:
MockColumn (DataColumn* column) :
m_column (column), m_var (column->var), m_enabled (true)
{
}
const Cell&
operator[] (std::size_t i) const
{
return m_column->cells[i];
}
void
set_range (double l_restr, double u_restr)
{
m_var.min = l_restr;
m_var.max = u_restr;
}
void
set_enabled (bool enabled)
{
m_enabled = enabled;
}
ColumnType
get_type () const
{
return m_column->type;
}
Variable
get_var () const
{
return m_var;
}
std::size_t
size () const
{
return m_column->cells.size ();
}
bool
is_enabled () const
{
return m_enabled;
}
static void
swap (MockColumn& m0, MockColumn& m1)
{
std::swap (m0.m_column, m1.m_column);
std::swap (m0.m_var, m1.m_var);
std::swap (m0.m_enabled, m1.m_enabled);
}
private:
std::shared_ptr<DataColumn> m_column;
Variable m_var;
bool m_enabled;
};
class DataRow
{
public:
class const_iterator : public std::iterator<std::input_iterator_tag,
const Cell>
{
typename std::vector<MockColumn>::const_iterator it;
typename std::vector<MockColumn>::const_iterator end;
const std::size_t offset;
public:
explicit
const_iterator (
const typename std::vector<MockColumn>::const_iterator& _it,
const typename std::vector<MockColumn>::const_iterator& _end,
std::size_t _offset) :
it (_it), end (_end), offset (_offset)
{
}
const_iterator&
operator++ ()
{
do
{
++it;
}
while (not it->is_enabled () and it != end);
return *this;
}
const_iterator
operator++ (int)
{
const_iterator retval = *this;
++(*this);
return retval;
}
bool
operator== (const const_iterator & other) const
{
return it == other.it;
}
bool
operator!= (const const_iterator & other) const
{
return it != other.it;
}
const Cell&
operator* () const
{
return (*it)[offset];
}
;
};
private:
const std::vector<MockColumn>& m_columns;
const std::size_t& m_enabled_columns;
const std::size_t m_offset;
bool m_enabled;
public:
DataRow (const std::vector<MockColumn>& columns,
const std::size_t& enabled_columns, std::size_t offset) :
m_columns (columns), m_enabled_columns (enabled_columns), m_offset (
offset), m_enabled (true)
{
}
const Cell&
operator[] (std::size_t i) const
{
if (i >= m_enabled_columns)
{
throw std::out_of_range (
"index exceeds range [0,"
+ std::to_string (m_enabled_columns) + "), given: "
+ std::to_string (i));
}
for (std::size_t j = 0; j <= i; ++j)
{
if (not m_columns[j].is_enabled ())
++i;
}
return m_columns[i][m_offset];
}
void
set_enabled (bool enabled)
{
m_enabled = enabled;
}
std::size_t
size () const
{
return m_enabled_columns;
}
bool
is_enabled () const
{
return m_enabled;
}
const_iterator
begin () const
{
return const_iterator (m_columns.begin (), m_columns.end (), m_offset);
}
const_iterator
end () const
{
return const_iterator (m_columns.end (), m_columns.end (), m_offset);
}
};
class const_iterator : public std::iterator<std::input_iterator_tag,
const DataRow>
{
typename std::vector<DataRow>::const_iterator m_it;
typename std::vector<DataRow>::const_iterator m_end;
public:
explicit
const_iterator (const typename std::vector<DataRow>::const_iterator & it,
const typename std::vector<DataRow>::const_iterator & end) :
m_it (it), m_end (end)
{
while (m_it != m_end and not m_it->is_enabled ())
{
++m_it;
}
}
const_iterator&
operator++ ()
{
do
{
++m_it;
}
while (m_it != m_end and not m_it->is_enabled ());
return *this;
}
const_iterator
operator++ (int)
{
const_iterator retval = *this;
++(*this);
return retval;
}
bool
operator== (const const_iterator & other) const
{
return m_it == other.m_it;
}
bool
operator!= (const const_iterator & other) const
{
return m_it != other.m_it;
}
const DataRow &
operator* () const
{
return *m_it;
}
;
};
private:
std::size_t m_num_rows, m_num_cols;
std::size_t m_num_inputs, m_num_outputs;
std::size_t m_separator;
std::vector<MockColumn> m_cols;
std::vector<DataRow> m_rows;
public:
DataSet () :
m_num_rows (0), m_num_cols (0), m_num_inputs (0), m_num_outputs (0), m_separator (
0)
{
}
DataSet (const std::string& fpath):
m_num_rows (0), m_num_cols (0), m_num_inputs (0), m_num_outputs (0), m_separator (
0)
{
parse_from_csv(Util::read_file(fpath));
}
void
parse_from_csv (const std::string & cont, std::string separator = ",",
std::string comment = "#", std::string newline = "\n");
std::string
write_to_csv (std::string separator = ",", std::string newline = "\n");
void
toggle_column (std::size_t c, bool mode)
{
if (c >= m_cols.size ())
{
throw std::out_of_range (
"index exceeds range [0, " + std::to_string (m_num_cols)
+ "), given: " + std::to_string (c));
}
if (not mode and m_cols[c].is_enabled ())
{
--m_num_cols;
if (m_cols[c].get_type () == ColumnType::INPUT)
{
--m_num_inputs;
}
else
{
--m_num_outputs;
}
}
else if (mode and not m_cols[c].is_enabled ())
{
++m_num_cols;
if (m_cols[c].get_type () == ColumnType::INPUT)
{
++m_num_inputs;
}
else
{
++m_num_outputs;
}
}
m_cols[c].set_enabled (mode);
}
void
swap_columns (std::size_t c0, std::size_t c1)
{
if (c0 >= m_cols.size())
{
throw std::out_of_range (
"index exceeds range [0, " + std::to_string (m_cols.size())
+ "), given: " + std::to_string (c0));
}
if (c1 >= m_cols.size())
{
throw std::out_of_range (
"index exceeds range [0, " + std::to_string (m_cols.size())
+ "), given: " + std::to_string (c1));
}
MockColumn::swap (m_cols[c0], m_cols[c1]);
}
void
restrict_column (std::size_t c, T l_restr, T u_restr)
{
if (c >= m_cols.size())
{
throw std::out_of_range (
"index exceeds range [0, " + std::to_string (m_cols.size())
+ "), given: " + std::to_string (c));
}
m_cols[c].set_range (l_restr, u_restr);
std::size_t num_disabled = std::count_if(m_cols.begin(), m_cols.begin() + c,
[](const MockColumn& c) -> std::size_t { return not c.is_enabled(); });
for (DataRow& r : m_rows)
{
if (r.is_enabled())
{
if (!r[c - num_disabled].null &&
(r[c - num_disabled].value < l_restr || r[c - num_disabled].value > u_restr))
{
r.set_enabled (false);
--m_num_rows;
}
}
else
{
bool is_active = true;
std::size_t data_row_index = 0;
for (std::size_t i = 0; i < m_cols.size(); ++i)
{
if (m_cols[i].is_enabled()) {
Variable variable = m_cols[i].get_var();
if (!r[data_row_index].null
&& (r[data_row_index].value < variable.min || r[data_row_index].value > variable.max)) {
is_active = false;
break;
}
++data_row_index;
}
}
if (is_active)
{
r.set_enabled (true);
++m_num_rows;
}
}
}
}
inline std::size_t
get_num_active_cols () const
{
return m_num_cols;
}
inline std::size_t
get_num_cols () const
{
return m_cols.size();
}
inline std::size_t
get_num_rows () const
{
return m_num_rows;
}
inline std::size_t
get_num_active_inputs () const
{
return m_num_inputs;
}
inline std::size_t
get_num_inputs () const
{
return m_separator;
}
inline std::size_t
get_num_active_outputs () const
{
return m_num_outputs;
}
inline std::size_t
get_num_outputs () const
{
return m_cols.size() - m_separator;
}
inline const DataRow &
operator[] (std::size_t i) const
{
return m_rows[i];
}
inline std::vector<Variable>
input_variables (void) const
{
std::vector<Variable> input_vars;
for (std::size_t i = 0; i < m_separator; ++i)
{
if (m_cols[i].is_enabled ())
{
input_vars.push_back (m_cols[i].get_var ());
}
}
return input_vars;
}
inline std::vector<Variable>
output_variables (void) const
{
std::vector<Variable> output_vars;
for (std::size_t i = m_separator; i < m_cols.size (); ++i)
{
if (m_cols[i].is_enabled ())
{
output_vars.push_back (m_cols[i].get_var ());
}
}
return output_vars;
}
inline const_iterator
begin () const
{
return const_iterator (m_rows.begin (), m_rows.end ());
}
inline const_iterator
end () const
{
return const_iterator (m_rows.end (), m_rows.end ());
}
std::tuple<std::string, std::string, std::string, std::string>
extract_header_entry(const std::string & header) const
{
// remove leading and trailing quotations
auto stripped_header = Util::strip(header);
if (stripped_header[0] == '\"' and stripped_header.back() =='\"') {
stripped_header = stripped_header.substr(1, stripped_header.size() - 2);
}
// the unit is encapsulated by brackets. we use the last pair of brackets
std::size_t open_bracket = stripped_header.find_last_of ("["),
close_bracket = stripped_header.find_first_of ("]", open_bracket + 1);
// the range is encapsulated by braces.
std::size_t open_brace = stripped_header.find_last_of ("("),
comma_separator = stripped_header.find_first_of (",", open_brace + 1),
close_brace = stripped_header.find_first_of (")", comma_separator + 1);
// the name can be enclosed in quotes
std::size_t open_quote = stripped_header.find_first_of("\""),
close_quote = stripped_header.find_last_of("\"");
//the unit should not be part of the name
//if no unit is present, open_bracket is npos, aka -1 i.e. very large
close_quote = std::min(std::min(close_quote, open_bracket), open_brace);
std::string name, unit, range_min, range_max;
// get unit only if it was specified
if (open_bracket != std::string::npos
&& close_bracket != std::string::npos)
{
unit = Util::strip(stripped_header.substr (
open_bracket + 1, close_bracket - open_bracket - 1));
}
// get range only if it was specified
if (open_brace != std::string::npos
&& comma_separator != std::string::npos
&& close_brace != std::string::npos)
{
range_min = Util::strip(stripped_header.substr (open_brace + 1, comma_separator - open_brace - 1));
range_max = Util::strip(stripped_header.substr (comma_separator + 1, close_brace - comma_separator - 1));
}
// if there are no quotes, we start after i# or o#
if (open_quote == std::string::npos)
{
open_quote = 1;
}
//get the name
name = Util::strip(stripped_header.substr (open_quote + 1,
close_quote - open_quote - 1));
return std::make_tuple(name, unit, range_min, range_max);
}
};
template<typename T>
void
DataSet<T>::parse_from_csv (const std::string & cont, std::string separator,
std::string comment, std::string newline)
{
static_assert(std::is_same<T, double>::value, "Should not be compiled.");
}
template<typename T>
std::string
DataSet<T>::write_to_csv (std::string separator, std::string newline)
{
static_assert(std::is_same<T, double>::value, "Should not be compiled.");
return "";
}
template<>
void
DataSet<double>::parse_from_csv (const std::string & cont,
std::string separator, std::string comment,
std::string newline);
template<>
std::string
DataSet<double>::write_to_csv (std::string separator, std::string newline);
typedef DataSet<double> DefDataSet;
typedef DefDataSet::Variable DefVariable;
typedef DefDataSet::DataRow DefDataRow;
typedef DefDataSet::Cell DefCell;
#endif /* DATASET_H_ */