Program Listing for File DataSet.h

Program Listing for File DataSet.h#

Return to documentation for file (src/main/cpp/lib/DataSet.h)

#ifndef DATASET_H_
#define DATASET_H_

#include <string>
#include <vector>
#include <algorithm>
#include <iterator>
#include <cfloat>
#include <stdexcept>
#include <memory>
#include <clocale>

#include "Utils.h"

template<typename T>
  class DataSet
  {
  public:
    struct Variable
    {
      T min;
      T max;
      std::string name;
      std::string unit;

      Variable (T min_, T max_, const std::string & name_,
        const std::string & unit_ = "") :
      min (min_), max (max_), name (name_), unit (unit_)
      {
      }
    };

    struct Cell
    {
      Cell () :
      null (true), value (0)
      {
      }

      Cell (T value_) :
      null (false), value (value_)
      {
      }

      const bool null;

      const T value;
    };

    enum ColumnType
    {
      INPUT, OUTPUT
    };

    struct DataColumn
    {
      const ColumnType type;
      Variable var;
      std::vector<Cell> cells;

      DataColumn (ColumnType type_, Variable var_) :
      type (type_), var (var_)
      {
      }
    };

    class MockColumn
    {
    public:
      MockColumn (DataColumn* column) :
      m_column (column), m_var (column->var), m_enabled (true)
      {
      }

      const Cell&
      operator[] (std::size_t i) const
      {
    return m_column->cells[i];
      }

      void
      set_range (double l_restr, double u_restr)
      {
    m_var.min = l_restr;
    m_var.max = u_restr;
      }

      void
      set_enabled (bool enabled)
      {
    m_enabled = enabled;
      }

      ColumnType
      get_type () const
      {
    return m_column->type;
      }

      Variable
      get_var () const
      {
    return m_var;
      }

      std::size_t
      size () const
      {
    return m_column->cells.size ();
      }

      bool
      is_enabled () const
      {
    return m_enabled;
      }

      static void
      swap (MockColumn& m0, MockColumn& m1)
      {
    std::swap (m0.m_column, m1.m_column);
    std::swap (m0.m_var, m1.m_var);
    std::swap (m0.m_enabled, m1.m_enabled);
      }
    private:
      std::shared_ptr<DataColumn> m_column;
      Variable m_var;
      bool m_enabled;
    };

    class DataRow
    {
    public:
      class const_iterator : public std::iterator<std::input_iterator_tag,
      const Cell>
      {
    typename std::vector<MockColumn>::const_iterator it;
    typename std::vector<MockColumn>::const_iterator end;
    const std::size_t offset;
      public:
    explicit
    const_iterator (
        const typename std::vector<MockColumn>::const_iterator& _it,
        const typename std::vector<MockColumn>::const_iterator& _end,
        std::size_t _offset) :
        it (_it), end (_end), offset (_offset)
    {
    }

    const_iterator&
    operator++ ()
    {
      do
        {
          ++it;
        }
      while (not it->is_enabled () and it != end);
      return *this;
    }
    const_iterator
    operator++ (int)
    {
      const_iterator retval = *this;
      ++(*this);
      return retval;
    }
    bool
    operator== (const const_iterator & other) const
    {
      return it == other.it;
    }
    bool
    operator!= (const const_iterator & other) const
    {
      return it != other.it;
    }
    const Cell&
    operator* () const
    {
      return (*it)[offset];
    }
    ;
      };

    private:
      const std::vector<MockColumn>& m_columns;
      const std::size_t& m_enabled_columns;
      const std::size_t m_offset;
      bool m_enabled;

    public:
      DataRow (const std::vector<MockColumn>& columns,
           const std::size_t& enabled_columns, std::size_t offset) :
      m_columns (columns), m_enabled_columns (enabled_columns), m_offset (
          offset), m_enabled (true)
      {
      }

      const Cell&
      operator[] (std::size_t i) const
      {
    if (i >= m_enabled_columns)
      {
        throw std::out_of_range (
        "index exceeds range [0,"
            + std::to_string (m_enabled_columns) + "), given: "
            + std::to_string (i));
      }

    for (std::size_t j = 0; j <= i; ++j)
      {
        if (not m_columns[j].is_enabled ())
          ++i;
      }

    return m_columns[i][m_offset];
      }

      void
      set_enabled (bool enabled)
      {
    m_enabled = enabled;
      }

      std::size_t
      size () const
      {
    return m_enabled_columns;
      }

      bool
      is_enabled () const
      {
    return m_enabled;
      }

      const_iterator
      begin () const
      {
    return const_iterator (m_columns.begin (), m_columns.end (), m_offset);
      }

      const_iterator
      end () const
      {
    return const_iterator (m_columns.end (), m_columns.end (), m_offset);
      }
    };

    class const_iterator : public std::iterator<std::input_iterator_tag,
    const DataRow>
    {
      typename std::vector<DataRow>::const_iterator m_it;
      typename std::vector<DataRow>::const_iterator m_end;
    public:
      explicit
      const_iterator (const typename std::vector<DataRow>::const_iterator & it,
              const typename std::vector<DataRow>::const_iterator & end) :
      m_it (it), m_end (end)
      {
    while (m_it != m_end and not m_it->is_enabled ())
      {
        ++m_it;
      }
      }
      const_iterator&
      operator++ ()
      {
    do
      {
        ++m_it;
      }
    while (m_it != m_end and not m_it->is_enabled ());
    return *this;
      }
      const_iterator
      operator++ (int)
      {
    const_iterator retval = *this;
    ++(*this);
    return retval;
      }
      bool
      operator== (const const_iterator & other) const
      {
    return m_it == other.m_it;
      }
      bool
      operator!= (const const_iterator & other) const
      {
    return m_it != other.m_it;
      }
      const DataRow &
      operator* () const
      {
    return *m_it;
      }
      ;
    };

  private:
    std::size_t m_num_rows, m_num_cols;
    std::size_t m_num_inputs, m_num_outputs;
    std::size_t m_separator;
    std::vector<MockColumn> m_cols;
    std::vector<DataRow> m_rows;

  public:
    DataSet () :
    m_num_rows (0), m_num_cols (0), m_num_inputs (0), m_num_outputs (0), m_separator (
        0)
    {
    }

    DataSet (const std::string& fpath):
    m_num_rows (0), m_num_cols (0), m_num_inputs (0), m_num_outputs (0), m_separator (
        0)
    {
      parse_from_csv(Util::read_file(fpath));
    }


    void
    parse_from_csv (const std::string & cont, std::string separator = ",",
            std::string comment = "#", std::string newline = "\n");

    std::string
    write_to_csv (std::string separator = ",", std::string newline = "\n");

    void
    toggle_column (std::size_t c, bool mode)
    {
      if (c >= m_cols.size ())
    {
      throw std::out_of_range (
          "index exceeds range [0, " + std::to_string (m_num_cols)
          + "), given: " + std::to_string (c));
    }

      if (not mode and m_cols[c].is_enabled ())
    {
      --m_num_cols;

      if (m_cols[c].get_type () == ColumnType::INPUT)
        {
          --m_num_inputs;
        }
      else
        {
          --m_num_outputs;
        }
    }
      else if (mode and not m_cols[c].is_enabled ())
    {
      ++m_num_cols;

      if (m_cols[c].get_type () == ColumnType::INPUT)
        {
          ++m_num_inputs;
        }
      else
        {
          ++m_num_outputs;
        }
    }

      m_cols[c].set_enabled (mode);
    }

    void
    swap_columns (std::size_t c0, std::size_t c1)
    {
      if (c0 >= m_cols.size())
    {
      throw std::out_of_range (
          "index exceeds range [0, " + std::to_string (m_cols.size())
          + "), given: " + std::to_string (c0));
    }
      if (c1 >= m_cols.size())
    {
      throw std::out_of_range (
          "index exceeds range [0, " + std::to_string (m_cols.size())
          + "), given: " + std::to_string (c1));
    }

      MockColumn::swap (m_cols[c0], m_cols[c1]);
    }

    void
    restrict_column (std::size_t c, T l_restr, T u_restr)
    {
      if (c >= m_cols.size())
    {
      throw std::out_of_range (
          "index exceeds range [0, " + std::to_string (m_cols.size())
          + "), given: " + std::to_string (c));
    }

      m_cols[c].set_range (l_restr, u_restr);
      std::size_t num_disabled = std::count_if(m_cols.begin(), m_cols.begin() + c,
                                               [](const MockColumn& c) -> std::size_t { return not c.is_enabled(); });
      for (DataRow& r : m_rows)
    {
      if (r.is_enabled())
        {
          if (!r[c - num_disabled].null &&
              (r[c - num_disabled].value < l_restr || r[c - num_disabled].value > u_restr))
            {
              r.set_enabled (false);
              --m_num_rows;
            }
        }
      else
        {
          bool is_active = true;
          std::size_t data_row_index = 0;
          for (std::size_t i = 0; i < m_cols.size(); ++i)
            {
              if (m_cols[i].is_enabled()) {
                  Variable variable = m_cols[i].get_var();
                  if (!r[data_row_index].null
                      && (r[data_row_index].value < variable.min || r[data_row_index].value > variable.max)) {
                      is_active = false;
                      break;
                  }
                  ++data_row_index;
              }
            }
          if (is_active)
            {
              r.set_enabled (true);
              ++m_num_rows;
            }
        }
    }
    }

    inline std::size_t
    get_num_active_cols () const
    {
      return m_num_cols;
    }

    inline std::size_t
    get_num_cols () const
    {
      return m_cols.size();
    }

    inline std::size_t
    get_num_rows () const
    {
      return m_num_rows;
    }

    inline std::size_t
    get_num_active_inputs () const
    {
      return m_num_inputs;
    }

    inline std::size_t
    get_num_inputs () const
    {
      return m_separator;
    }

    inline std::size_t
    get_num_active_outputs () const
    {
      return m_num_outputs;
    }

    inline std::size_t
    get_num_outputs () const
    {
      return m_cols.size() - m_separator;
    }

    inline const DataRow &
    operator[] (std::size_t i) const
    {
      return m_rows[i];
    }

    inline std::vector<Variable>
    input_variables (void) const
    {
      std::vector<Variable> input_vars;

      for (std::size_t i = 0; i < m_separator; ++i)
    {
      if (m_cols[i].is_enabled ())
        {
          input_vars.push_back (m_cols[i].get_var ());
        }
    }

      return input_vars;
    }

    inline std::vector<Variable>
    output_variables (void) const
    {
      std::vector<Variable> output_vars;

      for (std::size_t i = m_separator; i < m_cols.size (); ++i)
    {
      if (m_cols[i].is_enabled ())
        {
          output_vars.push_back (m_cols[i].get_var ());
        }
    }

      return output_vars;
    }

    inline const_iterator
    begin () const
    {
      return const_iterator (m_rows.begin (), m_rows.end ());
    }

    inline const_iterator
    end () const
    {
      return const_iterator (m_rows.end (), m_rows.end ());
    }

    std::tuple<std::string, std::string, std::string, std::string>
    extract_header_entry(const std::string & header) const
    {
        // remove leading and trailing quotations
        auto stripped_header = Util::strip(header);
        if (stripped_header[0] == '\"' and stripped_header.back() =='\"') {
            stripped_header = stripped_header.substr(1, stripped_header.size() - 2);
        }

        // the unit is encapsulated by brackets. we use the last pair of brackets
        std::size_t open_bracket = stripped_header.find_last_of ("["),
                close_bracket = stripped_header.find_first_of ("]", open_bracket + 1);
        // the range is encapsulated by braces.
        std::size_t open_brace = stripped_header.find_last_of ("("),
                comma_separator = stripped_header.find_first_of (",", open_brace + 1),
                close_brace = stripped_header.find_first_of (")", comma_separator + 1);
        // the name can be enclosed in quotes
        std::size_t open_quote = stripped_header.find_first_of("\""),
                close_quote = stripped_header.find_last_of("\"");

        //the unit should not be part of the name
        //if no unit is present, open_bracket is npos, aka -1 i.e. very large
        close_quote = std::min(std::min(close_quote, open_bracket), open_brace);

        std::string name, unit, range_min, range_max;

        // get unit only if it was specified
        if (open_bracket != std::string::npos
                && close_bracket != std::string::npos)
        {
            unit = Util::strip(stripped_header.substr (
                    open_bracket + 1, close_bracket - open_bracket - 1));
        }

        // get range only if it was specified
        if (open_brace != std::string::npos
            && comma_separator != std::string::npos
            && close_brace != std::string::npos)
        {
            range_min = Util::strip(stripped_header.substr (open_brace + 1, comma_separator - open_brace - 1));
            range_max = Util::strip(stripped_header.substr (comma_separator + 1, close_brace - comma_separator - 1));
        }

        // if there are no quotes, we start after i# or o#
        if (open_quote == std::string::npos)
        {
            open_quote = 1;
        }

        //get the name
        name = Util::strip(stripped_header.substr (open_quote + 1,
                close_quote - open_quote - 1));

        return std::make_tuple(name, unit, range_min, range_max);

    }
  };

template<typename T>
  void
  DataSet<T>::parse_from_csv (const std::string & cont, std::string separator,
                  std::string comment, std::string newline)
  {
    static_assert(std::is_same<T, double>::value, "Should not be compiled.");
  }

template<typename T>
  std::string
  DataSet<T>::write_to_csv (std::string separator, std::string newline)
  {
      static_assert(std::is_same<T, double>::value, "Should not be compiled.");
      return "";
  }

template<>
  void
  DataSet<double>::parse_from_csv (const std::string & cont,
                   std::string separator, std::string comment,
                   std::string newline);

template<>
  std::string
  DataSet<double>::write_to_csv (std::string separator, std::string newline);

typedef DataSet<double> DefDataSet;
typedef DefDataSet::Variable DefVariable;
typedef DefDataSet::DataRow DefDataRow;
typedef DefDataSet::Cell DefCell;

#endif /* DATASET_H_ */