# Compression algorithm described here:
# https://github.com/apankrat/notes/tree/master/fast-case-conversion
#
# This file generates the casemap.cpp source used by Natalie.
# To regenerate, run:
#
#     ruby lib/natalie/encoding/casemap_gen.rb > src/encoding/casemap.cpp

require 'open-uri'

EMPTY_BLOCK = [0] * 256

def calc_delta(code, mapped)
  if mapped && !mapped.empty?
    mapped = mapped.to_i(16)
    mapped - code
  else
    0
  end
end

def format_array(nums, count_per_line:, num_width:)
  "    " +
  nums.each_slice(count_per_line).map do |slice|
    slice.map do |num|
      num.to_s.rjust(num_width)
    end.join(', ')
  end.join(",\n    ")
end

def build_map_and_index(blocks)
  map = EMPTY_BLOCK.dup
  idx = 0
  index = blocks.map do |key, deltas|
    if deltas.all?(&:zero?)
      0
    else
      map += deltas
      idx += EMPTY_BLOCK.size
    end
  end
  [map, index]
end

unless File.exist?('/tmp/UnicodeData.txt')
  File.write(
    '/tmp/UnicodeData.txt',
    URI.open('http://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt').read
  )
end

data = File.read('/tmp/UnicodeData.txt').split(/\n/).map { |l| l.split(';') }
lcase_blocks = Hash.new { |h, k| h[k] = EMPTY_BLOCK.dup }
ucase_blocks = Hash.new { |h, k| h[k] = EMPTY_BLOCK.dup }
tcase_blocks = Hash.new { |h, k| h[k] = EMPTY_BLOCK.dup }
data.each do |code, _, _, _, _, _, _, _, _, _, _, _, upper, lower, title|
  code = code.to_i(16)
  lower_delta = calc_delta(code, lower)
  upper_delta = calc_delta(code, upper)
  title_delta = calc_delta(code, title)
  block = code >> 8
  lcase_blocks[block][code & 0xff] = lower_delta
  ucase_blocks[block][code & 0xff] = upper_delta
  tcase_blocks[block][code & 0xff] = title_delta
end

lcase_map, lcase_index = build_map_and_index(lcase_blocks)
ucase_map, ucase_index = build_map_and_index(ucase_blocks)
tcase_map, tcase_index = build_map_and_index(tcase_blocks)

unless File.exist?('/tmp/SpecialCasing.txt')
  File.write(
    '/tmp/SpecialCasing.txt',
    URI.open('http://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt').read
  )
end

data = File.read('/tmp/SpecialCasing.txt')
           .split(/# Conditional Mappings\n/)
           .first
           .split(/\n/)
           .reject { |l| l.start_with?('#') || l.strip.empty? }
special_casing_map = data.map do |line|
  parts = line.sub!(/\s*#.*$/, '').split(/\s*;\s*/)
  code, lower, title, upper = parts.map { |cc| cc.split.map { |c| c.to_i(16) } }
  code = code.first
  lower << 0 if lower.size < 2
  title << 0 if title.size < 3
  upper << 0 if upper.size < 3
  { code:, lower:, title:, upper: }
end

puts '// This file is auto-generated from http://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt'
puts '// See casemap_gen.rb in this repository for instructions regenerating it.'
puts '// DO NOT EDIT THIS FILE BY HAND!'
puts
puts '#include "natalie/encoding_object.hpp"'
puts '#include "natalie/types.hpp"'
puts
puts 'namespace Natalie {'
puts
puts "nat_int_t lcase_map[] = {\n#{format_array(lcase_map, count_per_line: 10, num_width: 6)}\n};"
puts
puts "nat_int_t ucase_map[] = {\n#{format_array(ucase_map, count_per_line: 10, num_width: 6)}\n};"
puts
puts "nat_int_t tcase_map[] = {\n#{format_array(tcase_map, count_per_line: 10, num_width: 6)}\n};"
puts
puts "nat_int_t lcase_index[] = {\n#{format_array(lcase_index, count_per_line: 10, num_width: 4)}\n};"
puts
puts "nat_int_t ucase_index[] = {\n#{format_array(ucase_index, count_per_line: 10, num_width: 4)}\n};"
puts
puts "nat_int_t tcase_index[] = {\n#{format_array(tcase_index, count_per_line: 10, num_width: 4)}\n};"
puts
puts "const int special_casing_map_size = #{special_casing_map.size};"
puts "SpecialCasingEntry special_casing_map[#{special_casing_map.size}] = { { 0 } };"
puts
puts 'void EncodingObject::init_special_casing_map() {'
special_casing_map.each_with_index do |mapping, index|
  code = "0x#{mapping[:code].to_s(16)}"
  lower = "{ #{mapping[:lower].map { |c| "0x#{c.to_s(16)}" }.join(', ')} }"
  title = "{ #{mapping[:title].map { |c| "0x#{c.to_s(16)}" }.join(', ')} }"
  upper = "{ #{mapping[:upper].map { |c| "0x#{c.to_s(16)}" }.join(', ')} }"
  puts "    special_casing_map[#{index}] = { #{code}, #{lower}, #{title}, #{upper} };"
end
puts '}'
puts
puts '}'
