Program Listing for File DataSet.cpp

Program Listing for File DataSet.cpp#

Return to documentation for file (src/main/cpp/lib/DataSet.cpp)

#include "DataSet.h"

template<>
  void
  DataSet<double>::parse_from_csv (const std::string& cont,
                   std::string separator, std::string comment,
                   std::string newline)
  {
    std::setlocale(LC_NUMERIC, "en_US.UTF-8");

    std::vector<DataColumn*> columns;
    const std::vector<std::string> & lines = Util::split (cont, newline);

    std::size_t header_pos = 0;
    while (lines[header_pos].find_first_not_of (Util::BLANKS) == std::string::npos
    || lines[header_pos][lines[header_pos].find_first_not_of (Util::BLANKS)] == '#')
      {
    ++header_pos;
      }
    const std::vector<std::string> & header = Util::split (lines[header_pos],
                               separator);

    size_t i = 0;

    // Add input variables from table header
    std::vector<std::pair<double, double>> input_ranges;
    while (i < header.size () &&
            (header[i].find ("i#") != std::string::npos || header[i].find ("I#") != std::string::npos))
      {
          auto entry = extract_header_entry(Util::strip(header[i]));
          std::string name = std::get<0>(entry), unit = std::get<1>(entry),
                  range_min_str = std::get<2>(entry), range_max_str = std::get<3>(entry);

          double range_min = std::numeric_limits<double>::min(), range_max = std::numeric_limits<double>::max();
          if (not range_min_str.empty())
          {
              range_min = std::stod(range_min_str);
          }
          if (not range_max_str.empty())
          {
              range_max = std::stod(range_max_str);
          }
          input_ranges.emplace_back(range_min, range_max);

        columns.push_back (
        new DataColumn (
            ColumnType::INPUT,
            DefVariable (DBL_MAX, DBL_MIN, Util::strip (name),
                 Util::strip (unit))));
    ++i;
      }
    std::size_t num_inputs = i;

    // Add output variables from table header
      std::vector<std::pair<double, double>> output_ranges;
    while (i < header.size () &&
            (header[i].find ("o#") != std::string::npos || header[i].find ("O#") != std::string::npos))
      {
      auto entry = extract_header_entry(Util::strip(header[i]));
          std::string name = std::get<0>(entry), unit = std::get<1>(entry),
                  range_min_str = std::get<2>(entry), range_max_str = std::get<3>(entry);

          double range_min = std::numeric_limits<double>::min(), range_max = std::numeric_limits<double>::max();
          if (not range_min_str.empty())
          {
              range_min = std::stod(range_min_str);
          }
          if (not range_max_str.empty())
          {
              range_max = std::stod(range_max_str);
          }
          output_ranges.emplace_back(range_min, range_max);

        columns.push_back (
        new DataColumn (
            ColumnType::OUTPUT,
            DefVariable (DBL_MAX, DBL_MIN, Util::strip (name),
                 Util::strip (unit))));
    ++i;
      }
    std::size_t num_outputs = i - num_inputs;

    // Add values from table body
    for (std::size_t rowc = header_pos + 1; rowc < lines.size (); ++rowc)
      {
    // Remove empty/comment lines
    if (lines[rowc].find_first_not_of (Util::BLANKS) != std::string::npos
        && lines[rowc].find_first_of (comment)
        != lines[rowc].find_first_not_of (Util::BLANKS) + 1)
      {
        const std::vector<std::string> & cells = Util::split (lines[rowc], ",",
                                  false);

        if (num_inputs + num_outputs != cells.size ())
          {
        throw std::length_error (
            "Row " + std::to_string (rowc)
            + " has an invalid count of cells" + "(expected: "
            + std::to_string (num_inputs + num_outputs)
            + ", given: " + std::to_string (cells.size ()) + ")");
          }

        // Add input variable values from this table line
        for (std::size_t i = 0; i < num_inputs; ++i)
          {
        try
          {
            double val = 0;
            if (Util::strip (cells[i]).empty() || std::isnan(val))
              val = std::numeric_limits<double>::quiet_NaN();
            else
              val = std::stod (Util::strip (cells[i]));
            DefCell cell (val);
            if (cell.value < columns[i]->var.min)
              columns[i]->var.min = cell.value;
            if (cell.value > columns[i]->var.max)
              columns[i]->var.max = cell.value;
            columns[i]->cells.push_back (cell);
          }
        catch (std::invalid_argument & e)
          {
            columns[i]->cells.emplace_back ();
          }
          }

        // Add output variables values from this table line
        for (std::size_t i = num_inputs; i < cells.size (); ++i)
          {
        try
          {
            DefCell cell (std::stod (Util::strip (cells[i])));
            if (cell.value < columns[i]->var.min)
              columns[i]->var.min = cell.value;
            if (cell.value > columns[i]->var.max)
              columns[i]->var.max = cell.value;
            columns[i]->cells.push_back (cell);
          }
        catch (std::invalid_argument & e)
          {
            columns[i]->cells.emplace_back ();
          }
          }
      }
      }

      // Add spacing to min and max if the variance of a column is 0
      for (std::size_t i = 0; i < columns.size(); ++i)
        {
      if (columns[i]->var.max - columns[i]->var.min <= std::numeric_limits<double>::epsilon())
        {
          columns[i]->var.min -= 1;
          columns[i]->var.max += 1;
        }
      }

    m_num_cols = columns.size ();
    m_num_rows = columns[0]->cells.size ();
    m_cols.reserve (m_num_cols);
    for (std::size_t i = 0; i < columns.size (); ++i)
      {
    m_cols.emplace_back (columns[i]);
      }

    m_num_inputs = m_separator = num_inputs;
    m_num_outputs = m_cols.size () - m_num_inputs;

    m_rows.reserve (m_cols[0].size ());
    for (std::size_t i = 0; i < m_cols[0].size (); ++i)
      {
    m_rows.push_back (DataRow (m_cols, m_num_cols, i));
      }

    for (std::size_t i = 0; i < m_num_inputs; ++i)
    {
        double range_min = m_cols[i].get_var().min, range_max = m_cols[i].get_var().max;
        if (input_ranges[i].first != std::numeric_limits<double>::min())
        {
            range_min = input_ranges[i].first;
        }
        if (input_ranges[i].second != std::numeric_limits<double>::max())
        {
            range_max = input_ranges[i].second;
        }
        this->restrict_column(i, range_min, range_max);
    }

      for (std::size_t i = 0; i < m_num_outputs; ++i)
      {
          double range_min = m_cols[i + m_num_inputs].get_var().min,
          range_max = m_cols[i + m_num_inputs].get_var().max;
          if (output_ranges[i].first  != std::numeric_limits<double>::min())
          {
              range_min = output_ranges[i].first;
          }
          if (output_ranges[i].second != std::numeric_limits<double>::max())
          {
              range_max = output_ranges[i].second;
          }
          this->restrict_column(i + m_num_inputs, range_min, range_max);
      }
  }

template<>
  std::string
  DataSet<double>::write_to_csv (std::string separator, std::string newline)
  {
    std::string content;

      std::setlocale(LC_NUMERIC, "en_US.UTF-8");
      for (DefVariable var : this->input_variables ())
      {
          content += "\"i#" + var.name;
          if (!var.unit.empty())
          {
              content += "[" + var.unit + "]";
          }
          content += "(" + std::to_string(var.min) + "," + std::to_string(var.max) + ")\"" + separator + " ";
      }

      for (DefVariable var : this->output_variables ())
      {
          content += "\"o#" + var.name;
          if (!var.unit.empty())
          {
              content += "[" + var.unit + "]";
          }
          content += "(" + std::to_string(var.min) + "," + std::to_string(var.max) + ")\"" + separator + " ";
      }

      content += newline;

      for (const DefDataRow & row : *this)
      {
          for (std::size_t i = 0; i < row.size(); ++i)
          {
              if (row[i].null) {
                  content += separator;
              }
              else
              {
                  content += std::to_string(row[i].value) + separator;
              }
          }

          content += newline;
      }

      return content;
  }