////////////////////////////////////////////////////////////////////////////////
/// DISCLAIMER
///
/// Copyright 2016 by EMC Corporation, All Rights Reserved
///
/// Licensed under the Apache License, Version 2.0 (the "License");
/// you may not use this file except in compliance with the License.
/// You may obtain a copy of the License at
///
///     http://www.apache.org/licenses/LICENSE-2.0
///
/// Unless required by applicable law or agreed to in writing, software
/// distributed under the License is distributed on an "AS IS" BASIS,
/// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
/// See the License for the specific language governing permissions and
/// limitations under the License.
///
/// Copyright holder is EMC Corporation
///
/// @author Andrey Abramov
/// @author Vasiliy Nabatchikov
////////////////////////////////////////////////////////////////////////////////

#include "index/field_data.hpp"

#include <algorithm>
#include <set>

#include "analysis/analyzer.hpp"
#include "analysis/token_attributes.hpp"
#include "analysis/token_streams.hpp"
#include "formats/formats.hpp"
#include "index/buffered_column_iterator.hpp"
#include "index/comparer.hpp"
#include "index/field_meta.hpp"
#include "index/norm.hpp"
#include "shared.hpp"
#include "store/directory.hpp"
#include "store/store_utils.hpp"
#include "utils/assert.hpp"
#include "utils/bit_utils.hpp"
#include "utils/bytes_utils.hpp"
#include "utils/io_utils.hpp"
#include "utils/log.hpp"
#include "utils/lz4compression.hpp"
#include "utils/memory.hpp"
#include "utils/object_pool.hpp"
#include "utils/timer_utils.hpp"
#include "utils/type_limits.hpp"

namespace {

using namespace irs;

const byte_block_pool EMPTY_POOL;

void accumulate_features(feature_set_t& accum, const feature_map_t& features) {
  for (auto& entry : features) {
    accum.emplace(entry.first);
  }
}

template<typename Stream>
void write_offset(posting& p, Stream& out, IndexFeatures& features,
                  const uint32_t base, const offset& offs) {
  const uint32_t start_offset = base + offs.start;
  const uint32_t end_offset = base + offs.end;

  IRS_ASSERT(start_offset >= p.offs);

  irs::vwrite<uint32_t>(out, start_offset - p.offs);
  irs::vwrite<uint32_t>(out, end_offset - start_offset);

  p.offs = start_offset;
  features |= IndexFeatures::OFFS;
}

template<typename Stream>
void write_prox(Stream& out, uint32_t prox, const irs::payload* pay,
                IndexFeatures& features) {
  if (!pay || pay->value.empty()) {
    irs::vwrite<uint32_t>(out, shift_pack_32(prox, false));
  } else {
    irs::vwrite<uint32_t>(out, shift_pack_32(prox, true));
    irs::vwrite<size_t>(out, pay->value.size());
    out.write(pay->value.data(), pay->value.size());

    // saw payloads
    features |= IndexFeatures::PAY;
  }
}

template<typename Inserter>
IRS_FORCE_INLINE void write_cookie(Inserter& out, uint64_t cookie) {
  *out = static_cast<byte_type>(cookie & 0xFF);  // offset
  irs::vwrite<uint32_t>(
    out, static_cast<uint32_t>((cookie >> 8) & 0xFFFFFFFF));  // slice offset
}

IRS_FORCE_INLINE uint64_t cookie(size_t slice_offset, size_t offset) noexcept {
  IRS_ASSERT(offset <= std::numeric_limits<uint8_t>::max());
  return static_cast<uint64_t>(slice_offset) << 8 |
         static_cast<byte_type>(offset);
}

template<typename Reader>
IRS_FORCE_INLINE uint64_t read_cookie(Reader& in) {
  const size_t offset = *in;
  ++in;
  const size_t slice_offset = irs::vread<uint32_t>(in);
  return cookie(slice_offset, offset);
}

IRS_FORCE_INLINE uint64_t
cookie(const byte_block_pool::sliced_greedy_inserter& stream) noexcept {
  // we don't span slices over the buffers
  const auto slice_offset = stream.slice_offset();
  return cookie(slice_offset, stream.pool_offset() - slice_offset);
}

IRS_FORCE_INLINE byte_block_pool::sliced_greedy_reader greedy_reader(
  const byte_block_pool& pool, uint64_t cookie) noexcept {
  return byte_block_pool::sliced_greedy_reader(
    pool, static_cast<size_t>((cookie >> 8) & 0xFFFFFFFF),
    static_cast<size_t>(cookie & 0xFF));
}

IRS_FORCE_INLINE byte_block_pool::sliced_greedy_inserter greedy_writer(
  byte_block_pool::inserter& writer, uint64_t cookie) noexcept {
  return byte_block_pool::sliced_greedy_inserter(
    writer, static_cast<size_t>((cookie >> 8) & 0xFFFFFFFF),
    static_cast<size_t>(cookie & 0xFF));
}

////////////////////////////////////////////////////////////////////////////////
/// @class pos_iterator
///////////////////////////////////////////////////////////////////////////////
template<typename Reader>
class pos_iterator final : public irs::position {
 public:
  pos_iterator() : prox_in_(EMPTY_POOL) {}

  void Clear() noexcept {
    pos_ = 0;
    value_ = pos_limits::invalid();
    offs_.clear();
    pay_.value = {};
  }

  // reset field
  void Reset(IndexFeatures features, const frequency& freq) {
    IRS_ASSERT(IndexFeatures::NONE != (features & IndexFeatures::FREQ));

    freq_ = &freq;

    std::get<attribute_ptr<offset>>(attrs_) =
      IndexFeatures::NONE != (features & IndexFeatures::OFFS) ? &offs_
                                                              : nullptr;

    std::get<attribute_ptr<payload>>(attrs_) =
      IndexFeatures::NONE != (features & IndexFeatures::PAY) ? &pay_ : nullptr;
  }

  // reset value
  void Reset(const Reader& prox) {
    Clear();
    prox_in_ = prox;
  }

  attribute* get_mutable(irs::type_info::type_id id) noexcept final {
    return irs::get_mutable(attrs_, id);
  }

  bool next() final {
    IRS_ASSERT(freq_);

    if (pos_ == freq_->value) {
      value_ = irs::pos_limits::eof();

      return false;
    }

    uint32_t pos;

    if (shift_unpack_32(irs::vread<uint32_t>(prox_in_), pos)) {
      const size_t size = irs::vread<size_t>(prox_in_);
      payload_value_.resize(size);
      prox_in_.read(const_cast<byte_type*>(payload_value_.data()), size);
      pay_.value = payload_value_;
    }

    value_ += pos;
    IRS_ASSERT(pos_limits::valid(value_));

    if (std::get<attribute_ptr<offset>>(attrs_).ptr) {
      offs_.start += irs::vread<uint32_t>(prox_in_);
      offs_.end = offs_.start + irs::vread<uint32_t>(prox_in_);
    }

    ++pos_;

    return true;
  }

 private:
  using attributes = std::tuple<attribute_ptr<offset>, attribute_ptr<payload>>;

  Reader prox_in_;
  bstring payload_value_;
  const frequency* freq_{};  // number of term positions in a document
  payload pay_;
  offset offs_;
  attributes attrs_;
  uint32_t pos_{};  // current position
};

}  // namespace

namespace irs {
namespace detail {

////////////////////////////////////////////////////////////////////////////////
/// @class doc_iterator
////////////////////////////////////////////////////////////////////////////////
class doc_iterator : public irs::doc_iterator {
 public:
  doc_iterator() noexcept : freq_in_(EMPTY_POOL) {}

  // reset field
  void reset(const field_data& field) {
    field_ = &field;
    auto& freq = std::get<attribute_ptr<frequency>>(attrs_);
    auto& pos = std::get<attribute_ptr<position>>(attrs_);
    freq = nullptr;
    pos = nullptr;
    has_cookie_ = false;

    const auto features = field.requested_features();
    if (IndexFeatures::NONE != (features & IndexFeatures::FREQ)) {
      freq = &freq_;

      if (IndexFeatures::NONE != (features & IndexFeatures::POS)) {
        pos_.Reset(features, freq_);
        pos = &pos_;
        has_cookie_ = field.prox_random_access();
      }
    }
  }

  // reset term
  void reset(const irs::posting& posting,
             const byte_block_pool::sliced_reader& freq,
             const byte_block_pool::sliced_reader* prox) {
    std::get<document>(attrs_).value = 0;
    freq_.value = 0;
    cookie_ = 0;
    freq_in_ = freq;
    posting_ = &posting;

    const auto& ppos = std::get<attribute_ptr<position>>(attrs_);

    if (ppos.ptr && prox) {
      // reset positions only once,
      // as we need iterator for sequential reads
      pos_.Reset(*prox);
    }
  }

  uint64_t cookie() const noexcept { return cookie_; }

  size_t cost() const noexcept { return posting_->size; }

  attribute* get_mutable(irs::type_info::type_id type) noexcept final {
    return irs::get_mutable(attrs_, type);
  }

  doc_id_t seek(doc_id_t doc) final {
    irs::seek(*this, doc);
    return value();
  }

  doc_id_t value() const noexcept final {
    return std::get<document>(attrs_).value;
  }

  bool next() final {
    auto& doc = std::get<document>(attrs_);

    if (freq_in_.eof()) {
      if (!posting_) {
        return false;
      }

      doc.value = posting_->doc;
      freq_.value = posting_->freq;

      if (has_cookie_) {
        // read last cookie
        cookie_ = *field_->int_writer_->parent().seek(posting_->int_start + 3);
      }

      posting_ = nullptr;
    } else {
      if (std::get<attribute_ptr<frequency>>(attrs_).ptr) {
        uint64_t delta;

        if (shift_unpack_64(irs::vread<uint64_t>(freq_in_), delta)) {
          freq_.value = 1U;
        } else {
          freq_.value = irs::vread<uint32_t>(freq_in_);
        }

        IRS_ASSERT(delta < doc_limits::eof());
        doc.value += doc_id_t(delta);

        if (has_cookie_) {
          cookie_ += read_cookie(freq_in_);
        }
      } else {
        doc.value += irs::vread<uint32_t>(freq_in_);
      }

      IRS_ASSERT(doc.value != posting_->doc);
    }

    pos_.Clear();

    return true;
  }

 private:
  using attributes =
    std::tuple<document, attribute_ptr<frequency>, attribute_ptr<position>>;

  const field_data* field_{};
  uint64_t cookie_{};
  frequency freq_;
  pos_iterator<byte_block_pool::sliced_reader> pos_;
  byte_block_pool::sliced_reader freq_in_;
  const posting* posting_{};
  attributes attrs_;
  bool has_cookie_{false};  // FIXME remove
};

////////////////////////////////////////////////////////////////////////////////
/// @class sorting_doc_iterator
////////////////////////////////////////////////////////////////////////////////
class sorting_doc_iterator : public irs::doc_iterator {
 public:
  // reset field
  void reset(const field_data& field) {
    IRS_ASSERT(field.prox_random_access());
    byte_pool_ = &field.byte_writer_->parent();

    auto& pfreq = std::get<attribute_ptr<frequency>>(attrs_);
    auto& ppos = std::get<attribute_ptr<position>>(attrs_);
    pfreq = nullptr;
    ppos = nullptr;

    const auto features = field.requested_features();
    if (IndexFeatures::NONE != (features & IndexFeatures::FREQ)) {
      pfreq = &freq_;

      if (IndexFeatures::NONE != (features & IndexFeatures::POS)) {
        pos_.Reset(features, freq_);
        ppos = &pos_;
      }
    }
  }

  // reset iterator,
  // docmap == null -> segment is already sorted
  void reset(detail::doc_iterator& it, const DocMap* docmap) {
    const frequency no_frequency;
    const frequency* freq = &no_frequency;

    const auto* freq_attr = irs::get<frequency>(it);
    if (freq_attr) {
      freq = freq_attr;
    }

    docs_.reserve(it.cost());
    docs_.clear();

    if (!docmap) {
      reset_already_sorted(it, *freq);
    } else if (irs::UseDenseSort(it.cost(),
                                 docmap->size() - 1)) {  // -1 for first element
      reset_dense(it, *freq, *docmap);
    } else {
      reset_sparse(it, *freq, *docmap);
    }

    std::get<document>(attrs_).value = irs::doc_limits::invalid();
    freq_.value = 0;
    it_ = docs_.begin();
  }

  attribute* get_mutable(irs::type_info::type_id type) noexcept final {
    return irs::get_mutable(attrs_, type);
  }

  doc_id_t seek(doc_id_t doc) noexcept final {
    irs::seek(*this, doc);
    return value();
  }

  doc_id_t value() const noexcept final {
    return std::get<document>(attrs_).value;
  }

  bool next() noexcept final {
    // cppcheck-suppress shadowFunction
    auto& value = std::get<document>(attrs_);

    while (it_ != docs_.end()) {
      if (doc_limits::eof(it_->doc)) {
        // skip invalid docs
        ++it_;
        continue;
      }

      auto& doc = *it_;
      value.value = doc.doc;
      freq_.value = doc.freq;

      if (doc.cookie) {  // we have proximity data
        pos_.Reset(greedy_reader(*byte_pool_, doc.cookie));
      }

      ++it_;
      return true;
    }

    value.value = doc_limits::eof();
    freq_.value = 0;
    return false;
  }

 private:
  using attributes =
    std::tuple<document, attribute_ptr<frequency>, attribute_ptr<position>>;

  struct doc_entry {
    doc_entry() = default;
    doc_entry(doc_id_t doc, uint32_t freq, uint64_t cookie) noexcept
      : doc(doc), freq(freq), cookie(cookie) {}

    doc_id_t doc{doc_limits::eof()};  // doc_id
    uint32_t freq;                    // freq
    uint64_t cookie;                  // prox_cookie
  };

  void reset_dense(detail::doc_iterator& it, const frequency& freq,
                   std::span<const doc_id_t> docmap) {
    IRS_ASSERT(!docmap.empty());
    IRS_ASSERT(irs::UseDenseSort(it.cost(),
                                 docmap.size() - 1));  // -1 for first element

    docs_.resize(docmap.size() - 1);  // -1 for first element

    while (it.next()) {
      IRS_ASSERT(it.value() - doc_limits::min() < docmap.size());
      const auto new_doc = docmap[it.value()];

      if (doc_limits::eof(new_doc)) {
        // skip invalid documents
        continue;
      }

      auto& doc = docs_[new_doc - doc_limits::min()];
      doc.doc = new_doc;
      doc.freq = freq.value;
      doc.cookie = it.cookie();
    }
  }

  void reset_sparse(detail::doc_iterator& it, const frequency& freq,
                    std::span<const doc_id_t> docmap) {
    IRS_ASSERT(!docmap.empty());
    IRS_ASSERT(!irs::UseDenseSort(it.cost(),
                                  docmap.size() - 1));  // -1 for first element

    while (it.next()) {
      IRS_ASSERT(it.value() - doc_limits::min() < docmap.size());
      const auto new_doc = docmap[it.value()];

      if (doc_limits::eof(new_doc)) {
        // skip invalid documents
        continue;
      }

      docs_.emplace_back(new_doc, freq.value, it.cookie());
    }

    std::sort(docs_.begin(), docs_.end(),
              [](const doc_entry& lhs, const doc_entry& rhs) noexcept {
                return lhs.doc < rhs.doc;
              });
  }

  void reset_already_sorted(detail::doc_iterator& it, const frequency& freq) {
    while (it.next()) {
      docs_.emplace_back(it.value(), freq.value, it.cookie());
    }
  }

  const byte_block_pool* byte_pool_{};
  std::vector<doc_entry>::const_iterator it_;
  std::vector<doc_entry> docs_;
  pos_iterator<byte_block_pool::sliced_greedy_reader> pos_;
  frequency freq_;
  attributes attrs_;
};

////////////////////////////////////////////////////////////////////////////////
/// @class term_iterator
////////////////////////////////////////////////////////////////////////////////
class term_iterator : public irs::term_iterator {
 public:
  explicit term_iterator(fields_data::postings_ref_t& postings,
                         const DocMap* docmap) noexcept
    : postings_(&postings), doc_map_(docmap) {}

  void reset(const field_data& field, bytes_view& min, bytes_view& max) {
    field_ = &field;

    doc_itr_.reset(field);
    if (field.prox_random_access()) {
      sorting_doc_itr_.reset(field);
    }

    // reset state
    field_->terms_.get_sorted_postings(*postings_);
    next_ = it_ = postings_->begin();
    end_ = postings_->end();

    max = min = {};
    if (it_ != end_) {
      min = (*it_)->term;
      max = (*(end_ - 1))->term;
    }
  }

  bytes_view value() const noexcept final {
    IRS_ASSERT(it_ != end_);
    return (*it_)->term;
  }

  attribute* get_mutable(irs::type_info::type_id) noexcept final {
    return nullptr;
  }

  void read() noexcept final {
    // Does nothing now
  }

  irs::doc_iterator::ptr postings(IndexFeatures /*features*/) const final {
    REGISTER_TIMER_DETAILED();
    IRS_ASSERT(it_ != end_);

    return (this->*POSTINGS[size_t(field_->prox_random_access())])(**it_);
  }

  bool next() final {
    if (next_ == end_) {
      return false;
    }

    it_ = next_;
    ++next_;
    return true;
  }

  const field_meta& meta() const noexcept { return field_->meta(); }

 private:
  typedef irs::doc_iterator::ptr (term_iterator::*postings_f)(
    const posting&) const;

  static const postings_f POSTINGS[2];

  irs::doc_iterator::ptr postings(const posting& posting) const {
    IRS_ASSERT(!doc_map_);

    // where the term data starts
    auto ptr = field_->int_writer_->parent().seek(posting.int_start);
    const auto freq_end = *ptr;
    ++ptr;
    const auto prox_end = *ptr;
    ++ptr;
    const auto freq_begin = *ptr;
    ++ptr;
    const auto prox_begin = *ptr;

    auto& pool = field_->byte_writer_->parent();
    const byte_block_pool::sliced_reader freq(pool, freq_begin,
                                              freq_end);  // term's frequencies
    const byte_block_pool::sliced_reader prox(
      pool, prox_begin,
      prox_end);  // term's proximity // TODO: create on demand!!!

    doc_itr_.reset(posting, freq, &prox);
    return memory::to_managed<irs::doc_iterator>(doc_itr_);
  }

  irs::doc_iterator::ptr sort_postings(const posting& posting) const {
    // where the term data starts
    auto ptr = field_->int_writer_->parent().seek(posting.int_start);
    const auto freq_end = *ptr;
    ++ptr;
    const auto freq_begin = *ptr;

    auto& pool = field_->byte_writer_->parent();
    const byte_block_pool::sliced_reader freq(pool, freq_begin,
                                              freq_end);  // term's frequencies

    doc_itr_.reset(posting, freq, nullptr);
    sorting_doc_itr_.reset(doc_itr_, doc_map_);
    return memory::to_managed<irs::doc_iterator>(sorting_doc_itr_);
  }

  fields_data::postings_ref_t* postings_{};
  fields_data::postings_ref_t::const_iterator end_;
  fields_data::postings_ref_t::const_iterator next_;
  fields_data::postings_ref_t::const_iterator it_;
  const field_data* field_{};
  const DocMap* doc_map_{};
  mutable detail::doc_iterator doc_itr_;
  mutable detail::sorting_doc_iterator sorting_doc_itr_;
};

const term_iterator::postings_f term_iterator::POSTINGS[2]{
  &term_iterator::postings, &term_iterator::sort_postings};

////////////////////////////////////////////////////////////////////////////////
/// @class term_reader
////////////////////////////////////////////////////////////////////////////////
class term_reader final : public irs::basic_term_reader,
                          private util::noncopyable {
 public:
  explicit term_reader(fields_data::postings_ref_t& postings,
                       const DocMap* docmap) noexcept
    : it_(postings, docmap) {}

  void reset(const field_data& field) { it_.reset(field, min_, max_); }

  irs::bytes_view(min)() const noexcept final { return min_; }

  irs::bytes_view(max)() const noexcept final { return max_; }

  const irs::field_meta& meta() const noexcept final { return it_.meta(); }

  irs::term_iterator::ptr iterator() const noexcept final {
    return memory::to_managed<irs::term_iterator>(it_);
  }

  attribute* get_mutable(irs::type_info::type_id) noexcept final {
    return nullptr;
  }

 private:
  mutable detail::term_iterator it_;
  irs::bytes_view min_{};
  irs::bytes_view max_{};
};

}  // namespace detail

doc_iterator::ptr cached_column::iterator(ColumnHint hint) const {
  // kPrevDoc isn't supported atm
  IRS_ASSERT(ColumnHint::kNormal == (hint & ColumnHint::kPrevDoc));

  // FIXME(gnusi): can avoid allocation with the help of managed_ptr
  return memory::make_managed<BufferedColumnIterator>(stream_.Index(),
                                                      stream_.Data());
}

field_data::field_data(
  std::string_view name, const features_t& features,
  const FeatureInfoProvider& feature_columns,
  std::deque<cached_column, ManagedTypedAllocator<cached_column>>&
    cached_columns,
  const feature_set_t& cached_features, columnstore_writer& columns,
  byte_block_pool::inserter& byte_writer, int_block_pool::inserter& int_writer,
  IndexFeatures index_features, bool random_access)
  // Unset optional features
  : meta_{name, index_features & (~(IndexFeatures::OFFS | IndexFeatures::PAY))},
    terms_{*byte_writer},
    byte_writer_{&byte_writer},
    int_writer_{&int_writer},
    proc_table_{kTermProcessingTables[size_t(random_access)]},
    requested_features_{index_features},
    last_doc_{doc_limits::invalid()} {
  for (const type_info::type_id feature : features) {
    IRS_ASSERT(feature_columns);
    auto [feature_column_info, feature_writer_factory] =
      feature_columns(feature);

    auto feature_writer =
      feature_writer_factory ? (*feature_writer_factory)({}) : nullptr;
    auto* id = &meta_.features[feature];
    *id = field_limits::invalid();
    if (!feature_writer) {
      continue;
    }
    columnstore_writer::column_finalizer_f finalizer =
      [writer = feature_writer.get()](bstring& out) {
        writer->finish(out);
        return std::string_view{};
      };

    // sorted index case or the feature is required for wand
    if (random_access || cached_features.contains(feature)) {
      auto& column = cached_columns.emplace_back(
        id, feature_column_info, std::move(finalizer),
        cached_columns.get_allocator().ResourceManager());
      features_.emplace_back(std::move(feature_writer), column.Stream());
    } else {
      auto [column, out] =
        columns.push_column(feature_column_info, std::move(finalizer));
      features_.emplace_back(std::move(feature_writer), out);
      *id = column;
    }
  }
}

void field_data::reset(doc_id_t doc_id) {
  IRS_ASSERT(doc_limits::valid(doc_id));

  if (doc_id == last_doc_) {
    return;  // nothing to do
  }

  pos_ = pos_limits::invalid();
  last_pos_ = 0;
  stats_ = {};
  offs_ = 0;
  last_start_offs_ = 0;
  last_doc_ = doc_id;
  seen_ = false;
}

void field_data::new_term(posting& p, doc_id_t did, const payload* pay,
                          const offset* offs) {
  // where pointers to data starts
  p.int_start = int_writer_->pool_offset();

  const auto freq_start =
    byte_writer_->alloc_slice();  // pointer to freq stream
  const auto prox_start =
    byte_writer_->alloc_slice();  // pointer to prox stream
  *int_writer_ = freq_start;      // freq stream end
  *int_writer_ = prox_start;      // prox stream end
  *int_writer_ = freq_start;      // freq stream start
  *int_writer_ = prox_start;      // prox stream start

  p.doc = did;
  if (IndexFeatures::NONE == (requested_features_ & IndexFeatures::FREQ)) {
    p.doc_code = did;
  } else {
    p.doc_code = uint64_t(did) << 1;
    p.freq = 1;

    if (IndexFeatures::NONE != (requested_features_ & IndexFeatures::POS)) {
      auto& prox_stream_end = *int_writer_->parent().seek(p.int_start + 1);
      byte_block_pool::sliced_inserter prox_out(*byte_writer_, prox_stream_end);

      write_prox(prox_out, pos_, pay, meta_.index_features);

      if (offs) {
        write_offset(p, prox_out, meta_.index_features, offs_, *offs);
      }

      prox_stream_end = prox_out.pool_offset();
      p.pos = pos_;
    }
  }

  stats_.max_term_freq = std::max(1U, stats_.max_term_freq);
  ++stats_.num_unique;
}

void field_data::add_term(posting& p, doc_id_t did, const payload* pay,
                          const offset* offs) {
  if (IndexFeatures::NONE == (requested_features_ & IndexFeatures::FREQ)) {
    if (p.doc != did) {
      IRS_ASSERT(did > p.doc);

      auto& doc_stream_end = *int_writer_->parent().seek(p.int_start);
      byte_block_pool::sliced_inserter doc_out(*byte_writer_, doc_stream_end);
      irs::vwrite<uint32_t>(doc_out, doc_id_t(p.doc_code));
      doc_stream_end = doc_out.pool_offset();

      p.doc_code = did - p.doc;
      p.doc = did;
      ++stats_.num_unique;
    }
  } else if (p.doc != did) {
    IRS_ASSERT(did > p.doc);

    auto& doc_stream_end = *int_writer_->parent().seek(p.int_start);
    byte_block_pool::sliced_inserter doc_out(*byte_writer_, doc_stream_end);

    if (1U == p.freq) {
      irs::vwrite<uint64_t>(doc_out, p.doc_code | UINT64_C(1));
    } else {
      irs::vwrite<uint64_t>(doc_out, p.doc_code);
      irs::vwrite<uint32_t>(doc_out, p.freq);
    }

    p.doc_code = uint64_t(did - p.doc) << 1;
    p.freq = 1;

    p.doc = did;
    stats_.max_term_freq = std::max(1U, stats_.max_term_freq);
    ++stats_.num_unique;

    if (IndexFeatures::NONE != (requested_features_ & IndexFeatures::POS)) {
      auto& prox_stream_end = *int_writer_->parent().seek(p.int_start + 1);
      byte_block_pool::sliced_inserter prox_out(*byte_writer_, prox_stream_end);

      write_prox(prox_out, pos_, pay, meta_.index_features);

      if (offs) {
        p.offs = 0;  // reset base offset
        write_offset(p, prox_out, meta_.index_features, offs_, *offs);
      }

      prox_stream_end = prox_out.pool_offset();
      p.pos = pos_;
    }

    doc_stream_end = doc_out.pool_offset();
  } else {  // exists in current doc
    stats_.max_term_freq = std::max(++p.freq, stats_.max_term_freq);
    if (IndexFeatures::NONE != (requested_features_ & IndexFeatures::POS)) {
      auto& prox_stream_end = *int_writer_->parent().seek(p.int_start + 1);
      byte_block_pool::sliced_inserter prox_out(*byte_writer_, prox_stream_end);

      write_prox(prox_out, pos_ - p.pos, pay, meta_.index_features);

      if (offs) {
        write_offset(p, prox_out, meta_.index_features, offs_, *offs);
      }

      prox_stream_end = prox_out.pool_offset();
      p.pos = pos_;
    }
  }
}

void field_data::new_term_random_access(posting& p, doc_id_t did,
                                        const payload* pay,
                                        const offset* offs) {
  // where pointers to data starts
  p.int_start = int_writer_->pool_offset();

  const auto freq_start =
    byte_writer_->alloc_slice();  // pointer to freq stream
  const auto prox_start =
    byte_writer_->alloc_greedy_slice();  // pointer to prox stream
  *int_writer_ = freq_start;             // freq stream end
  *int_writer_ = freq_start;             // freq stream start

  const auto cookie = ::cookie(prox_start, 1);
  *int_writer_ = cookie;  // end cookie
  *int_writer_ = cookie;  // start cookie
  *int_writer_ = 0;       // last start cookie

  p.doc = did;
  if (IndexFeatures::NONE == (requested_features_ & IndexFeatures::FREQ)) {
    p.doc_code = did;
  } else {
    p.doc_code = uint64_t(did) << 1;
    p.freq = 1;

    if (IndexFeatures::NONE != (requested_features_ & IndexFeatures::POS)) {
      byte_block_pool::sliced_greedy_inserter prox_out(*byte_writer_,
                                                       prox_start, 1);

      write_prox(prox_out, pos_, pay, meta_.index_features);

      if (offs) {
        write_offset(p, prox_out, meta_.index_features, offs_, *offs);
      }

      auto& end_cookie = *int_writer_->parent().seek(p.int_start + 2);
      end_cookie = ::cookie(prox_out);  // prox stream end cookie

      p.pos = pos_;
    }
  }

  stats_.max_term_freq = std::max(1U, stats_.max_term_freq);
  ++stats_.num_unique;
}

void field_data::add_term_random_access(posting& p, doc_id_t did,
                                        const payload* pay,
                                        const offset* offs) {
  if (IndexFeatures::NONE == (requested_features_ & IndexFeatures::FREQ)) {
    if (p.doc != did) {
      IRS_ASSERT(did > p.doc);

      auto& doc_stream_end = *int_writer_->parent().seek(p.int_start);
      byte_block_pool::sliced_inserter doc_out(*byte_writer_, doc_stream_end);
      irs::vwrite<uint32_t>(doc_out, doc_id_t(p.doc_code));
      doc_stream_end = doc_out.pool_offset();

      ++p.size;
      p.doc_code = did - p.doc;
      p.doc = did;
      ++stats_.num_unique;
    }
  } else if (p.doc != did) {
    IRS_ASSERT(did > p.doc);

    auto& doc_stream_end = *int_writer_->parent().seek(p.int_start);
    byte_block_pool::sliced_inserter doc_out(*byte_writer_, doc_stream_end);

    if (1U == p.freq) {
      irs::vwrite<uint64_t>(doc_out, p.doc_code | UINT64_C(1));
    } else {
      irs::vwrite<uint64_t>(doc_out, p.doc_code);
      irs::vwrite<uint32_t>(doc_out, p.freq);
    }

    ++p.size;
    p.doc_code = uint64_t(did - p.doc) << 1;
    p.freq = 1;

    p.doc = did;
    stats_.max_term_freq = std::max(1U, stats_.max_term_freq);
    ++stats_.num_unique;

    if (IndexFeatures::NONE != (requested_features_ & IndexFeatures::POS)) {
      auto prox_stream_cookie = int_writer_->parent().seek(p.int_start + 2);

      auto& end_cookie = *prox_stream_cookie;
      ++prox_stream_cookie;
      auto& start_cookie = *prox_stream_cookie;
      ++prox_stream_cookie;
      auto& last_start_cookie = *prox_stream_cookie;

      write_cookie(doc_out, start_cookie - last_start_cookie);
      // cppcheck-suppress selfAssignment
      last_start_cookie = start_cookie;  // update previous cookie
      // cppcheck-suppress selfAssignment
      start_cookie = end_cookie;  // update start cookie

      auto prox_out = greedy_writer(*byte_writer_, end_cookie);

      write_prox(prox_out, pos_, pay, meta_.index_features);

      if (offs) {
        p.offs = 0;  // reset base offset
        write_offset(p, prox_out, meta_.index_features, offs_, *offs);
      }

      end_cookie = cookie(prox_out);
      p.pos = pos_;
    }

    doc_stream_end = doc_out.pool_offset();
  } else {  // exists in current doc
    stats_.max_term_freq = std::max(++p.freq, stats_.max_term_freq);
    if (IndexFeatures::NONE != (requested_features_ & IndexFeatures::POS)) {
      // update end cookie
      auto& end_cookie = *int_writer_->parent().seek(p.int_start + 2);
      auto prox_out = greedy_writer(*byte_writer_, end_cookie);

      write_prox(prox_out, pos_ - p.pos, pay, meta_.index_features);

      if (offs) {
        write_offset(p, prox_out, meta_.index_features, offs_, *offs);
      }

      end_cookie = cookie(prox_out);
      p.pos = pos_;
    }
  }
}

bool field_data::invert(token_stream& stream, doc_id_t id) {
  REGISTER_TIMER_DETAILED();
  IRS_ASSERT(id < doc_limits::eof());  // 0-based document id

  const auto* term = get<term_attribute>(stream);
  const auto* inc = get<increment>(stream);
  const offset* offs = nullptr;
  const payload* pay = nullptr;

  if (!inc) {
    IRS_LOG_ERROR(absl::StrCat("field '", meta_.name,
                               "' missing required token_stream attribute '",
                               type<increment>::name(), "'"));
    return false;
  }

  if (!term) {
    IRS_LOG_ERROR(absl::StrCat("field '", meta_.name,
                               "' missing required token_stream attribute '",
                               type<term_attribute>::name(), "'"));
    return false;
  }

  if (IndexFeatures::NONE != (requested_features_ & IndexFeatures::OFFS)) {
    offs = get<offset>(stream);

    if (offs) {
      pay = get<payload>(stream);
    }
  }

  reset(id);  // initialize field_data for the supplied doc_id

  while (stream.next()) {
    pos_ += inc->value;

    if (pos_ < last_pos_) {
      IRS_LOG_ERROR(absl::StrCat("invalid position ", pos_, " < ", last_pos_,
                                 " in field '", meta_.name, "'"));
      return false;
    }

    if (pos_ >= pos_limits::eof()) {
      IRS_LOG_ERROR(absl::StrCat("invalid position ", pos_,
                                 " >= ", pos_limits::eof(), " in field '",
                                 meta_.name, "'"));
      return false;
    }

    if (0 == inc->value) {
      ++stats_.num_overlap;
    }

    if (offs) {
      const uint32_t start_offset = offs_ + offs->start;
      const uint32_t end_offset = offs_ + offs->end;

      if (start_offset < last_start_offs_ || end_offset < start_offset) {
        IRS_LOG_ERROR(absl::StrCat("invalid offset start=", start_offset,
                                   " end=", end_offset, " in field '",
                                   meta_.name, "'"));
        return false;
      }

      last_start_offs_ = start_offset;
    }

    auto* p = terms_.emplace(term->value);

    if (p == nullptr) {
      IRS_LOG_WARN(absl::StrCat("skipping too long term of size: ",
                                term->value.size(), " in field: ", meta_.name));
      IRS_LOG_TRACE(
        absl::StrCat("field: ", meta_.name,
                     " contains too long term: ", ViewCast<char>(term->value)));
      continue;
    }

    (this->*proc_table_[!doc_limits::valid(p->doc)])(*p, id, pay, offs);
    IRS_ASSERT(doc_limits::valid(p->doc));

    if (0 == ++stats_.len) {
      IRS_LOG_ERROR(absl::StrCat("too many tokens in field: ", meta_.name,
                                 ", document: ", id));
      return false;
    }

    last_pos_ = pos_;
  }

  if (offs) {
    offs_ += offs->end;
  }

  return true;
}

fields_data::fields_data(
  const FeatureInfoProvider& feature_info,
  std::deque<cached_column, ManagedTypedAllocator<cached_column>>&
    cached_columns,
  const feature_set_t& cached_features, IResourceManager& rm,
  const Comparer* comparator /*= nullptr*/)
  : comparator_{comparator},
    feature_info_{&feature_info},
    fields_{{rm}},
    cached_columns_{&cached_columns},
    cached_features_{&cached_features},
    byte_pool_{{rm}},
    byte_writer_{byte_pool_.begin()},
    int_pool_{{rm}},
    int_writer_{int_pool_.begin()} {}

field_data* fields_data::emplace(const hashed_string_view& name,
                                 IndexFeatures index_features,
                                 const features_t& features,
                                 columnstore_writer& columns) {
  IRS_ASSERT(fields_map_.size() == fields_.size());

  auto it = fields_map_.lazy_emplace(
    name, [&name](const fields_map::constructor& ctor) {
      ctor(nullptr, name.hash());
    });

  if (!it->ref) {
    try {
      const_cast<field_data*&>(it->ref) = &fields_.emplace_back(
        name, features, *feature_info_, *cached_columns_, *cached_features_,
        columns, byte_writer_, int_writer_, index_features,
        (nullptr != comparator_));
    } catch (...) {
      fields_map_.erase(it);
      throw;
    }
  }

  return it->ref;
}

void fields_data::flush(field_writer& fw, flush_state& state) {
  REGISTER_TIMER_DETAILED();

  IndexFeatures index_features{IndexFeatures::NONE};
  feature_set_t features;

  // sort fields
  sorted_fields_.resize(fields_.size());
  auto begin = sorted_fields_.begin();
  for (auto& entry : fields_) {
    *begin = &entry;
    ++begin;

    const auto& meta = entry.meta();
    index_features |= static_cast<IndexFeatures>(meta.index_features);
    accumulate_features(features, meta.features);
  }

  state.index_features = static_cast<IndexFeatures>(index_features);
  state.features = &features;

  std::sort(sorted_fields_.begin(), sorted_fields_.end(),
            [](const field_data* lhs, const field_data* rhs) noexcept {
              return lhs->meta().name < rhs->meta().name;
            });

  detail::term_reader terms(sorted_postings_, state.docmap);

  fw.prepare(state);
  for (auto* field : sorted_fields_) {
    // Reset reader
    terms.reset(*field);

    // Write inverted data
    fw.write(terms, terms.meta().features);
  }

  fw.end();
}

void fields_data::reset() noexcept {
  byte_writer_ = byte_pool_.begin();  // reset position pointer to start of pool
  fields_.clear();
  fields_map_.clear();
  int_writer_ = int_pool_.begin();  // reset position pointer to start of pool
}

size_t fields_data::memory_active() const noexcept {
  return byte_writer_.pool_offset() +
         int_writer_.pool_offset() * sizeof(int_block_pool::value_type) +
         fields_map_.size() * sizeof(fields_map::value_type) +
         fields_.size() * sizeof(decltype(fields_)::value_type);
}

size_t fields_data::memory_reserved() const noexcept {
  // FIXME(@gnusi): revisit the implementation
  return byte_pool_.size() + int_pool_.size();
}

// use base irs::position type for ancestors
template<typename Reader>
struct type<::pos_iterator<Reader>> : type<irs::position> {};

}  // namespace irs
