mirror of https://github.com/mosra/magnum.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
238 lines
8.0 KiB
238 lines
8.0 KiB
/* |
|
This file is part of Magnum. |
|
|
|
Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, |
|
2020, 2021, 2022, 2023 Vladimír Vondruš <mosra@centrum.cz> |
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a |
|
copy of this software and associated documentation files (the "Software"), |
|
to deal in the Software without restriction, including without limitation |
|
the rights to use, copy, modify, merge, publish, distribute, sublicense, |
|
and/or sell copies of the Software, and to permit persons to whom the |
|
Software is furnished to do so, subject to the following conditions: |
|
|
|
The above copyright notice and this permission notice shall be included |
|
in all copies or substantial portions of the Software. |
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|
DEALINGS IN THE SOFTWARE. |
|
*/ |
|
|
|
#include "ColorBatch.h" |
|
|
|
#include <cstring> |
|
#include <Corrade/Containers/StridedArrayView.h> |
|
|
|
#include "Magnum/Magnum.h" |
|
|
|
namespace Magnum { namespace Math { |
|
|
|
namespace { |
|
|
|
inline void yFlipBc1BlockInPlace(char* const data) { |
|
/* The 64-bit block is laid out as follows: |
|
|
|
- 2 bytes for first endpoint color |
|
- 2 bytes for second endpoint color |
|
- 4 bytes for 4x4 2-bit color indices in this order: |
|
|
|
a b c d |
|
e f g h |
|
i j k l |
|
m n o p |
|
|
|
Which means each row is one byte, so the Y-flip reduces down to a simple |
|
byte swap. See the official specification for details: |
|
https://learn.microsoft.com/cs-cz/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#bc1 */ |
|
{ |
|
char tmp = data[4]; |
|
data[4] = data[7]; |
|
data[7] = tmp; |
|
} { |
|
char tmp = data[5]; |
|
data[5] = data[6]; |
|
data[6] = tmp; |
|
} |
|
} |
|
|
|
inline void yFlipBc2BlockInPlace(char* data) { |
|
/* The 128-bit block is laid out as follows: |
|
|
|
- 8 bytes for 4x4 4-bit alpha values, same order as BC1 |
|
- 2 bytes for first endpoint color |
|
- 2 bytes for second endpoint color |
|
- 4 bytes for 4x4 2-bit color indices, same order as BC1 |
|
|
|
Which means, swapping the alphas and then doing the same as with BC1 |
|
for the color indices in the second half. |
|
https://learn.microsoft.com/cs-cz/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#bc2 */ |
|
{ |
|
char tmp1 = data[0]; |
|
char tmp2 = data[1]; |
|
data[0] = data[6]; |
|
data[1] = data[7]; |
|
data[6] = tmp1; |
|
data[7] = tmp2; |
|
} { |
|
char tmp1 = data[2]; |
|
char tmp2 = data[3]; |
|
data[2] = data[4]; |
|
data[3] = data[5]; |
|
data[4] = tmp1; |
|
data[5] = tmp2; |
|
} |
|
|
|
yFlipBc1BlockInPlace(data + 8); |
|
} |
|
|
|
inline void yFlipBc4BlockInPlace(char* data) { |
|
/* The 64-bit block is laid out as follows: |
|
|
|
- 1 byte for first endpoint color channel |
|
- 1 byte for second endpoint color channel |
|
- 6 bytes for 4x4 3-bit color indices + interpolation factors, same |
|
order as BC1 |
|
|
|
Compared to BC1, this means swapping groups of 12 bits instead of 8. |
|
https://learn.microsoft.com/cs-cz/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#bc4 */ |
|
|
|
/* Load & byte-swap last 6 bytes */ |
|
union { |
|
char bytes[8]; |
|
UnsignedLong value; |
|
} block; |
|
for(std::size_t i = 0; i != 6; ++i) { |
|
#ifndef CORRADE_TARGET_BIG_ENDIAN |
|
block.bytes[i] = data[i + 2]; |
|
#else |
|
block.bytes[i] = data[7 - i]; |
|
#endif |
|
} |
|
|
|
/* Flip the 12-bit groups */ |
|
block.value = |
|
(block.value & 0xfff000000000ull) >> 36 | |
|
(block.value & 0x000fff000000ull) >> 12 | |
|
(block.value & 0x000000fff000ull) << 12 | |
|
(block.value & 0x000000000fffull) << 36; |
|
|
|
/* Byte-swap & store the 6 bytes back */ |
|
for(std::size_t i = 0; i != 6; ++i) { |
|
#ifndef CORRADE_TARGET_BIG_ENDIAN |
|
data[i + 2] = block.bytes[i]; |
|
#else |
|
data[7 - i] = block.bytes[i]; |
|
#endif |
|
} |
|
} |
|
|
|
inline void yFlipBc3BlockInPlace(char* const data) { |
|
yFlipBc4BlockInPlace(data); |
|
yFlipBc1BlockInPlace(data + 8); |
|
} |
|
|
|
inline void yFlipBc5BlockInPlace(char* const data) { |
|
yFlipBc4BlockInPlace(data); |
|
yFlipBc4BlockInPlace(data + 8); |
|
} |
|
|
|
template<std::size_t blockSize, void(*flipBlock)(char*)> void yFlipBlocksInPlace(const Containers::StridedArrayView4D<char>& blocks |
|
#ifndef CORRADE_NO_ASSERT |
|
, const char* messagePrefix |
|
#endif |
|
) { |
|
const std::size_t* const size = blocks.size().begin(); |
|
CORRADE_ASSERT(size[3] == blockSize, |
|
messagePrefix << "expected last dimension to be" << blockSize << "bytes but got" << size[3], ); |
|
CORRADE_ASSERT(blocks.isContiguous<3>(), |
|
messagePrefix << "last dimension is not contiguous", ); |
|
|
|
/* The high-level logic is mostly a copy of Utility::flipInPlace() without |
|
the "leftovers" part. It's however not calling that function directly |
|
because it'd mean going through memory twice, once for copying whole |
|
blocks and once for recalculating each block. */ |
|
|
|
CORRADE_INTERNAL_ASSERT(blocks.template isContiguous<3>()); |
|
|
|
char* const ptr = static_cast<char*>(blocks.data()); |
|
const std::ptrdiff_t* const stride = blocks.stride().begin(); |
|
for(std::size_t z = 0; z != size[0]; ++z) { |
|
char* const ptrZ = ptr + z*stride[0]; |
|
|
|
/* Go through half of the items in Y and swap them with the other |
|
half, flipping their contents along the way */ |
|
for(std::size_t y = 0, yMax = size[1]/2; y != yMax; ++y) { |
|
char* const ptrYTop = ptrZ + y*stride[1]; |
|
char* const ptrYBottom = ptrZ + (size[1] - y - 1)*stride[1]; |
|
|
|
/* Copy a block at a time and flip its contents as well */ |
|
alignas(blockSize) char tmp[blockSize]; |
|
for(std::size_t x = 0; x != size[2]; ++x) { |
|
char* const ptrXTop = ptrYTop + x*stride[2]; |
|
char* const ptrXBottom = ptrYBottom + x*stride[2]; |
|
flipBlock(ptrXTop); |
|
flipBlock(ptrXBottom); |
|
std::memcpy(tmp, ptrXTop, blockSize); |
|
std::memcpy(ptrXTop, ptrXBottom, blockSize); |
|
std::memcpy(ptrXBottom, tmp, blockSize); |
|
} |
|
} |
|
|
|
/* If there was an odd number of rows, make sure to flip contents of |
|
the middle row as well */ |
|
if(size[1] % 2) { |
|
char* const ptrYMid = ptrZ + (size[1]/2)*stride[1]; |
|
for(std::size_t x = 0; x != size[2]; ++x) |
|
flipBlock(ptrYMid + x*stride[2]); |
|
} |
|
} |
|
} |
|
|
|
} |
|
|
|
void yFlipBc1InPlace(const Containers::StridedArrayView4D<char>& blocks) { |
|
yFlipBlocksInPlace<8, yFlipBc1BlockInPlace>(blocks |
|
#ifndef CORRADE_NO_ASSERT |
|
, "Math::yFlipBc1InPlace():" |
|
#endif |
|
); |
|
} |
|
|
|
void yFlipBc2InPlace(const Containers::StridedArrayView4D<char>& blocks) { |
|
yFlipBlocksInPlace<16, yFlipBc2BlockInPlace>(blocks |
|
#ifndef CORRADE_NO_ASSERT |
|
, "Math::yFlipBc2InPlace():" |
|
#endif |
|
); |
|
} |
|
|
|
void yFlipBc3InPlace(const Containers::StridedArrayView4D<char>& blocks) { |
|
yFlipBlocksInPlace<16, yFlipBc3BlockInPlace>(blocks |
|
#ifndef CORRADE_NO_ASSERT |
|
, "Math::yFlipBc3InPlace():" |
|
#endif |
|
); |
|
} |
|
|
|
void yFlipBc4InPlace(const Containers::StridedArrayView4D<char>& blocks) { |
|
yFlipBlocksInPlace<8, yFlipBc4BlockInPlace>(blocks |
|
#ifndef CORRADE_NO_ASSERT |
|
, "Math::yFlipBc4InPlace():" |
|
#endif |
|
); |
|
} |
|
|
|
void yFlipBc5InPlace(const Containers::StridedArrayView4D<char>& blocks) { |
|
yFlipBlocksInPlace<16, yFlipBc5BlockInPlace>(blocks |
|
#ifndef CORRADE_NO_ASSERT |
|
, "Math::yFlipBc5InPlace():" |
|
#endif |
|
); |
|
} |
|
|
|
}}
|
|
|