/**
\author		Alexey Shaydurov aka ASH
\date		26.03.2016 (c)Andrey Korotkov

This file is a part of DGLE project and is distributed
under the terms of the GNU Lesser General Public License.
See "DGLE.h" for more details.
*/

template<bool>
struct IDX_BLOCK_TYPE;

template<>
struct IDX_BLOCK_TYPE<true>
{
	typedef uint64_t TYPE;
};

template<>
struct IDX_BLOCK_TYPE<false>
{
	typedef uint32_t TYPE;
};

template<uint8_t bpi>
struct INDICES
{
	inline unsigned int operator [](unsigned i) const
	{
		union
		{
			typename IDX_BLOCK_TYPE<(block_bytes > sizeof(uint32_t))>::TYPE uint: block_bits;
			uint8_t bytes[block_bytes];
		};
		for (uint_fast8_t byte_idx = 0; byte_idx < block_bytes; ++byte_idx)
			bytes[byte_idx] = data[byte_idx];
		return uint >> i * bpi & mask;
	}
private:
	static constexpr uint_fast8_t block_bits = bpi * 16, block_bytes = block_bits / 8;
	static constexpr unsigned int mask = ~0u >> (sizeof(unsigned int) * 8 - bpi);
	uint8_t data[block_bytes];
};

struct COLOR24;

struct COLOR48
{
	inline COLOR48(uint16_t r, uint16_t g, uint16_t b): r(r), g(g), b(b) {}
	inline COLOR24 operator /(uint_fast8_t scalar) const;
	uint16_t r, g, b;
};

struct COLOR24
{
	inline COLOR24(uint_fast16_t color16): r((color16 >> 11 & 0x1f) * 255.f / 31.f), g((color16 >> 5 & 0x3f) * 255.f / 63.f), b((color16 >> 0 & 0x1f) * 255.f / 31.f) {}
	inline COLOR24(uint8_t r, uint8_t g, uint8_t b): r(r), g(g), b(b) {}
	inline operator COLOR48() const
	{
		return COLOR48(r, g, b);
	}
	inline COLOR48 operator *(uint_fast8_t scalar) const;
	uint8_t r, g, b;
};

inline COLOR48 COLOR24::operator *(uint_fast8_t scalar) const
{
	return COLOR48(r * scalar, g * scalar, b * scalar);
}

inline COLOR24 COLOR48::operator /(uint_fast8_t scalar) const
{
	return COLOR24(r / scalar, g / scalar, b / scalar);
}

inline COLOR48 operator +(const COLOR48 &left, const COLOR48 &right)
{
	return COLOR48(left.r + right.r, left.g + right.g, left.b + right.b);
}

void UnpackBC1(uint_fast16_t width, uint_fast16_t height, const uint8_t *src, uint8_t *dst)
{
#pragma pack(push, 1)
	struct BC1_BLOCK
	{
		uint16_t color_0, color_1;
		INDICES<2> indices;
	};
#pragma pack(pop)
	for (uint_fast16_t blockY = 0; blockY < (height + 3) / 4; ++blockY)
		for (uint_fast16_t blockX = 0; blockX < (width + 3) / 4; ++blockX)
		{
			const BC1_BLOCK &cur_block = reinterpret_cast<const BC1_BLOCK *>(src)[blockY * ((width + 3) / 4) + blockX];
			COLOR24 color_table[4] =
			{
				cur_block.color_0,
				cur_block.color_1,
				(color_table[0] * 2 + color_table[1]) / 3,
				(color_table[0] + color_table[1] * 2) / 3
			};
			for (int y = 0; y < 4 && y < height - blockY * 4; ++y)
				for (int x = 0; x < 4 && x < width - blockX * 4; ++x)
				{
					uint8_t (&dst_pixel)[3] = reinterpret_cast<uint8_t (*const&)[3]>(dst)[(blockY * 4 + y) * width + (blockX * 4 + x)];
					unsigned int color_idx = cur_block.indices[y * 4 + x];
					dst_pixel[0] = color_table[color_idx].r;
					dst_pixel[1] = color_table[color_idx].g;
					dst_pixel[2] = color_table[color_idx].b;
				}
		}
}

void UnpackBC3(uint_fast16_t width, uint_fast16_t height, const uint8_t *src, uint8_t *dst)
{
#pragma pack(push, 1)
	struct BC3_BLOCK
	{
		uint8_t alpha_0, alpha_1;
		INDICES<3> alpha_indices;
		uint16_t color_0, color_1;
		INDICES<2> color_indices;
	};
#pragma pack(pop)
	for (uint_fast16_t blockY = 0; blockY < (height + 3) / 4; ++blockY)
		for (uint_fast16_t blockX = 0; blockX < (width + 3) / 4; ++blockX)
		{
			const BC3_BLOCK &cur_block = reinterpret_cast<const BC3_BLOCK *>(src)[blockY * ((width + 3) / 4) + blockX];
			COLOR24 color_table[4] =
			{
				cur_block.color_0,
				cur_block.color_1,
				(color_table[0] * 2 + color_table[1]) / 3,
				(color_table[0] + color_table[1] * 2) / 3
			};
			uint_least8_t alpha_table[8] = {cur_block.alpha_0, cur_block.alpha_1};
			if (alpha_table[0] > alpha_table[1])
			{
				alpha_table[2] = (alpha_table[0] * 6 + alpha_table[1] * 1) / 7;
				alpha_table[3] = (alpha_table[0] * 5 + alpha_table[1] * 2) / 7;
				alpha_table[4] = (alpha_table[0] * 4 + alpha_table[1] * 3) / 7;
				alpha_table[5] = (alpha_table[0] * 3 + alpha_table[1] * 4) / 7;
				alpha_table[6] = (alpha_table[0] * 2 + alpha_table[1] * 5) / 7;
				alpha_table[7] = (alpha_table[0] * 1 + alpha_table[1] * 6) / 7;
			}
			else
			{
				alpha_table[2] = (alpha_table[0] * 4 + alpha_table[1] * 1) / 5;
				alpha_table[3] = (alpha_table[0] * 3 + alpha_table[1] * 2) / 5;
				alpha_table[4] = (alpha_table[0] * 2 + alpha_table[1] * 3) / 5;
				alpha_table[5] = (alpha_table[0] * 1 + alpha_table[1] * 4) / 5;
				alpha_table[6] = 0;
				alpha_table[7] = 255;
			}
			for (int y = 0; y < 4 && y < height - blockY * 4; ++y)
				for (int x = 0; x < 4 && x < width - blockX * 4; ++x)
				{
					uint8_t (&dst_pixel)[4] = reinterpret_cast<uint8_t (*const&)[4]>(dst)[(blockY * 4 + y) * width + (blockX * 4 + x)];
					unsigned int color_idx = cur_block.color_indices[y * 4 + x];
					dst_pixel[0] = color_table[color_idx].r;
					dst_pixel[1] = color_table[color_idx].g;
					dst_pixel[2] = color_table[color_idx].b;
					dst_pixel[3] = alpha_table[cur_block.alpha_indices[y * 4 + x]];
				}
		}
}