#include "jcch_benchmark_item_runner.hpp"

#include <atomic>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <memory>
#include <random>
#include <sstream>
#include <string>
#include <string_view>
#include <vector>

#include <boost/algorithm/string/replace.hpp>
#include <boost/date_time/gregorian/gregorian.hpp>

#include "abstract_benchmark_item_runner.hpp"
#include "benchmark_config.hpp"
#include "benchmark_sql_executor.hpp"
#include "sql/sql_pipeline_statement.hpp"
#include "tpch/tpch_benchmark_item_runner.hpp"
#include "tpch/tpch_constants.hpp"
#include "tpch/tpch_queries.hpp"
#include "types.hpp"
#include "utils/assert.hpp"
#include "utils/date_time_utils.hpp"
#include "utils/string_utils.hpp"
#include "utils/timer.hpp"

namespace hyrise {

JCCHBenchmarkItemRunner::JCCHBenchmarkItemRunner(const bool skewed, const std::string& dbgen_path,
                                                 const std::string& data_path,
                                                 const std::shared_ptr<BenchmarkConfig>& config,
                                                 bool use_prepared_statements, float scale_factor,
                                                 ClusteringConfiguration clustering_configuration)
    : TPCHBenchmarkItemRunner(config, use_prepared_statements, scale_factor, clustering_configuration),
      _skewed(skewed),
      _dbgen_path(dbgen_path),
      _data_path(data_path) {
  _load_params();
}

JCCHBenchmarkItemRunner::JCCHBenchmarkItemRunner(const bool skewed, const std::string& dbgen_path,
                                                 const std::string& data_path,
                                                 const std::shared_ptr<BenchmarkConfig>& config,
                                                 bool use_prepared_statements, float scale_factor,
                                                 ClusteringConfiguration clustering_configuration,
                                                 const std::vector<BenchmarkItemID>& items)
    : TPCHBenchmarkItemRunner(config, use_prepared_statements, scale_factor, clustering_configuration, items),
      _skewed(skewed),
      _dbgen_path(dbgen_path),
      _data_path(data_path) {
  _load_params();
}

std::string JCCHBenchmarkItemRunner::item_name(const BenchmarkItemID item_id) const {
  Assert(item_id < 22, "item_id out of range.");
  return std::string("JCC-H ") + (_skewed ? "(skewed) " : "(normal) ") + (item_id + 1 < 10 ? "0" : "") +
         std::to_string(item_id + 1);
}

void JCCHBenchmarkItemRunner::_load_params() {
  const auto local_queries_path = _data_path + "/queries/";
  const auto params_path = local_queries_path + "params-" + (_skewed ? "skewed" : "normal");

  // Check if the query parameters have already been generated.
  if (!std::filesystem::exists(params_path)) {
    auto timer = Timer{};

    std::cout << "- Creating query parameters by calling external qgen" << std::flush;

    // Check for the existence of dbgen's query templates (1.sql etc.) at the expected location.
    const auto dbgen_queries_path = _dbgen_path + "/queries/";
    Assert(std::filesystem::exists(dbgen_queries_path),
           std::string{"Query templates not found at "} + dbgen_queries_path);

    // NOLINTBEGIN(concurrency-mt-unsafe): std::system() is not thread-safe. We can ignore this warning, because
    // _load_params is only called in the constructor once.

    // Create local directory and copy query templates if needed
    const auto local_queries_dir_created = std::filesystem::create_directory(local_queries_path);
    Assert(std::filesystem::exists(local_queries_path), "Creating JCC-H queries folder failed.");
    if (local_queries_dir_created) {
      auto cmd = std::stringstream{};
      cmd << "cd " << local_queries_path << " && ln -s " << _dbgen_path << "/queries/*.sql .";
      const auto ret = std::system(cmd.str().c_str());
      Assert(!ret, "Creating symlinks to query templates failed.");
    }

    // Call qgen a couple of times with different PRNG seeds and store the resulting query parameters in queries/params.
    // dbgen doesn't like `-r 0`, so we start at 1.
    for (auto seed = int64_t{1}; seed <= (_config->max_runs > 0 ? _config->max_runs : 100'000); ++seed) {
      auto cmd = std::stringstream{};
      cmd << "cd " << local_queries_path << " && " << _dbgen_path << "/qgen " << (_skewed ? "-k" : "") << " -s "
          << _scale_factor << " -b " << _dbgen_path << "/dists.dss -r " << seed << " -l " << params_path
          << " >/dev/null";
      const auto ret = std::system(cmd.str().c_str());
      Assert(!ret, "Calling qgen failed.");
    }
    // NOLINTEND(concurrency-mt-unsafe)

    std::cout << " (" << timer.lap_formatted() << ")\n";
  }

  // Open the params file, which looks like this:
  //   query_id|param0|param1
  auto file = std::ifstream(params_path);
  Assert(file.is_open(), std::string{"Could not open JCC-H parameters at "} + params_path);

  auto line = std::string{};
  while (std::getline(file, line)) {
    // Load the parameter into the corresponding entry in _all_params
    auto string_values = split_string_by_delimiter(line, '\t');
    const auto query_id = std::stoi(string_values[0]);
    Assert(query_id >= 1 && query_id <= 22, "Invalid query_id.");
    string_values.erase(string_values.begin());
    _all_params[query_id - 1].emplace_back(string_values);
  }
}

bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, BenchmarkSQLExecutor& sql_executor) {
  const auto& this_item_params = _all_params[item_id];

  // Choose a random parameterization from _all_params
  static thread_local auto random_engine = std::minstd_rand{_random_seed++};
  auto params_dist = std::uniform_int_distribution<>{0, static_cast<int>(this_item_params.size() - 1)};
  const auto raw_params_iter = this_item_params.begin() + params_dist(random_engine);

  auto parameters = std::vector<std::string>{};
  auto sql = std::string{};

  // This mirrors TPCHBenchmarkItemRunner::_on_execute_item. Instead of generating random parameters according to the
  // TPC-H specifications, it uses the ones generated by JCC-H's qgen.
  switch (item_id) {
    // Writing `1-1` to make people aware that this is zero-indexed while TPC-H/JCC-H query names are not.
    case 1 - 1: {
      // In some cases, we still need to do the date calculations that SQLite (used for verification) does not
      // support yet. When parsing a date, we expect the generator to provide sound date strings and omit checks when
      // dereferencing the optionals.
      const auto date = date_interval(boost::gregorian::date{1998, 12, 01}, -std::stoi(raw_params_iter->at(0)),
                                      DatetimeComponent::Day);
      parameters.emplace_back("'" + date_to_string(date) + "'");
      break;
    }

    case 2 - 1: {
      parameters.emplace_back(raw_params_iter->at(0));
      parameters.emplace_back("'%" + raw_params_iter->at(1) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(2) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(2) + "'");
      break;
    }

    case 3 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      break;
    }

    case 4 - 1: {
      const auto begin_date = string_to_timestamp(raw_params_iter->at(0))->date();
      const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month);

      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + date_to_string(end_date) + "'");
      break;
    }

    case 5 - 1: {
      const auto begin_date = string_to_timestamp(raw_params_iter->at(1))->date();
      const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year);

      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      parameters.emplace_back("'" + date_to_string(end_date) + "'");
      break;
    }

    case 6 - 1: {
      const auto begin_date = string_to_timestamp(raw_params_iter->at(0))->date();
      const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year);

      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + date_to_string(end_date) + "'");
      parameters.emplace_back(raw_params_iter->at(1));
      parameters.emplace_back(raw_params_iter->at(1));
      parameters.emplace_back(raw_params_iter->at(2));
      break;
    }

    case 7 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(2) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(3) + "'");
      break;
    }

    case 8 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(2) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(3) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(4) + "'");
      break;
    }

    case 9 - 1: {
      static auto warned_performance = false;
      if (!warned_performance) {
        std::cerr << "\nWarning: JCC-H Query 9 needs optimization. Consider skipping it using -q\n\n";
        warned_performance = true;
      }

      parameters.emplace_back("'%" + raw_params_iter->at(0) + "%'");
      break;
    }

    case 10 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      break;
    }

    case 11 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back(raw_params_iter->at(1));
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      break;
    }

    case 12 - 1: {
      const auto begin_date = string_to_timestamp(raw_params_iter->at(2))->date();
      const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year);

      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(2) + "'");
      parameters.emplace_back("'" + date_to_string(end_date) + "'");
      break;
    }

    case 13 - 1: {
      parameters.emplace_back("'%" + raw_params_iter->at(0) + '%' + raw_params_iter->at(1) + "%'");
      break;
    }

    case 14 - 1: {
      const auto begin_date = string_to_timestamp(raw_params_iter->at(0))->date();
      const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Month);

      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + date_to_string(end_date) + "'");
      break;
    }

    case 15 - 1: {
      auto query_15 = std::string{tpch_queries.at(15)};

      const auto begin_date = string_to_timestamp(raw_params_iter->at(0))->date();
      const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month);

      // Hack: We cannot use prepared statements in TPC-H 15. Thus, we need to build the SQL string by hand.
      // By manually replacing the `?` from tpch_queries.cpp, we can keep all queries in a readable form there.
      // This is ugly, but at least we can assert that nobody tampered with the string over there.
      static constexpr auto BEGIN_DATE_OFFSET = 156;
      static constexpr auto END_DATE_OFFSET = 192;
      DebugAssert((std::string_view{&query_15[BEGIN_DATE_OFFSET], 10} == "1996-01-01" &&
                   std::string_view{&query_15[END_DATE_OFFSET], 10} == "1996-04-01"),
                  "TPC-H 15 string has been modified");
      query_15.replace(BEGIN_DATE_OFFSET, 10, raw_params_iter->at(0));
      query_15.replace(END_DATE_OFFSET, 10, date_to_string(end_date));

      const auto view_id = std::atomic_fetch_add(&_q15_view_id, size_t{1});
      boost::replace_all(query_15, std::string("revenue_view"), std::string("revenue") + std::to_string(view_id));

      // Not using _substitute_placeholders here
      sql = query_15;
      break;
    }

    case 16 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      for (auto index = size_t{0}; index < 8; ++index) {
        parameters.emplace_back(raw_params_iter->at(2 + index));
      }
      break;
    }

    case 17 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      break;
    }

    case 18 - 1: {
      static auto warned_compliance = false;
      if (!warned_compliance) {
        std::cerr << "\nWarning: JCC-H Query 18 as used by Hyrise slightly diverges from the specification.\n";
        std::cerr << "         See jcch_benchmark_item_runner.cpp for details.\n\n";
        warned_compliance = true;
      }

      // JCC-H has a second parameter to this query:
      //   https://github.com/ldbc/dbgen.JCC-H/commit/d42a7ebc2617ec31de55b00425c23ab7885beeeb#diff-c448b6246f882ef1a5fd8e7ded77b8134addba8443ce2b43425e563045895fc4
      // We do not use this parameter as it would bring a structural change to the SQL query template, which is also
      // used for TPC-H.
      parameters.emplace_back(raw_params_iter->at(0));
      break;
    }

    case 19 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      parameters.emplace_back(raw_params_iter->at(3));
      parameters.emplace_back(raw_params_iter->at(3));
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      parameters.emplace_back(raw_params_iter->at(4));
      parameters.emplace_back(raw_params_iter->at(4));
      parameters.emplace_back("'" + raw_params_iter->at(2) + "'");
      parameters.emplace_back(raw_params_iter->at(5));
      parameters.emplace_back(raw_params_iter->at(5));

      break;
    }

    case 20 - 1: {
      const auto begin_date = string_to_timestamp(raw_params_iter->at(1))->date();
      const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year);

      parameters.emplace_back("'" + raw_params_iter->at(0) + "%'");
      parameters.emplace_back("'" + raw_params_iter->at(1) + "'");
      parameters.emplace_back("'" + date_to_string(end_date) + "'");
      parameters.emplace_back("'" + raw_params_iter->at(2) + "'");
      break;
    }

    case 21 - 1: {
      parameters.emplace_back("'" + raw_params_iter->at(0) + "'");
      break;
    }

    case 22 - 1: {
      // We need the same country code twice - have a look at the query
      for (auto index = size_t{0}; index < 7; ++index) {
        parameters.emplace_back("'" + raw_params_iter->at(index) + "'");
      }

      for (auto index = size_t{0}; index < 7; ++index) {
        parameters.emplace_back("'" + raw_params_iter->at(index) + "'");
      }
      break;
    }

    default:
      Fail("There are only 22 JCC-H queries.");
  }

  if (sql.empty()) {
    sql = _substitute_placeholders(item_id, parameters);
  }

  const auto [status, table] = sql_executor.execute(sql, nullptr);
  Assert(status == SQLPipelineStatus::Success, "JCC-H items should not fail.");
  return true;
}

}  // namespace hyrise
