From 16ef1ae0b6982c2ef70b742a2604a2a093bd6f36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= <mosra@centrum.cz>
Date: Sun, 1 Jan 2017 21:34:58 +0100
Subject: [PATCH] Math: functions for half-float (un)packing.

Most of the code is in the actual test where I'm comparing and
benchmarking three different implementations (a
naive/straightforward/ground-truth one, the chosen one and a fast though
cache-spilling table-based one) to ensure the behavior is consistent
across all of them and that the performance is within reasonable bounds.

The Corrade::TestSuite benchmarking stuff needs serious improvements,
though.
---
 src/Magnum/CMakeLists.txt            |   1 +
 src/Magnum/Math/Packing.cpp          | 104 ++++++
 src/Magnum/Math/Packing.h            |  43 +++
 src/Magnum/Math/Test/CMakeLists.txt  |   1 +
 src/Magnum/Math/Test/HalfTest.cpp    | 472 +++++++++++++++++++++++++++
 src/Magnum/Math/Test/PackingTest.cpp |   4 +-
 src/Magnum/PixelFormat.h             |   1 +
 src/Magnum/TextureFormat.h           |   4 +
 8 files changed, 629 insertions(+), 1 deletion(-)
 create mode 100644 src/Magnum/Math/Packing.cpp
 create mode 100644 src/Magnum/Math/Test/HalfTest.cpp

diff --git a/src/Magnum/CMakeLists.txt b/src/Magnum/CMakeLists.txt
index 061f94ebc..ac71a6b53 100644
--- a/src/Magnum/CMakeLists.txt
+++ b/src/Magnum/CMakeLists.txt
@@ -231,6 +231,7 @@ endif()
 set(MagnumMath_SRCS
     Math/Color.cpp
     Math/Functions.cpp
+    Math/Packing.cpp
     Math/instantiation.cpp)
 
 # Objects shared between main and test library
diff --git a/src/Magnum/Math/Packing.cpp b/src/Magnum/Math/Packing.cpp
new file mode 100644
index 000000000..0550c3bf1
--- /dev/null
+++ b/src/Magnum/Math/Packing.cpp
@@ -0,0 +1,104 @@
+/*
+    This file is part of Magnum.
+
+    Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016
+              Vladimír Vondruš <mosra@centrum.cz>
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*/
+
+#include "Packing.h"
+
+namespace Magnum { namespace Math {
+
+namespace {
+
+union FloatBits {
+    UnsignedInt u;
+    Float f;
+};
+
+}
+
+/* half_to_float_fast4() from https://gist.github.com/rygorous/2144712 */
+Float unpackHalf(const UnsignedShort value) {
+    constexpr const FloatBits Magic{113 << 23};
+    /* Exponent mask after shift */
+    constexpr const UnsignedInt ShiftedExp = 0x7c00 << 13;
+
+    const UnsignedShort h{value};
+    FloatBits o;
+
+    o.u = (h & 0x7fff) << 13;                   /* exponent/mantissa bits */
+    const UnsignedInt exp = ShiftedExp & o.u;   /* just the exponent */
+    o.u += (127 - 15) << 23;                    /* exponent adjust */
+
+    /* handle exponent special cases */
+    if(exp == ShiftedExp) {                     /* Inf/NaN? */
+        o.u += (128 - 16) << 23;                /* Extra exp adjust */
+    } else if(exp == 0) {                       /* Zero/Denormal */
+        o.u += 1 << 23;                         /* Extra exp adjust */
+        o.f -= Magic.f;                         /* Renormalize */
+    }
+
+    o.u |= (h & 0x8000) << 16;                  /* sign bit */
+    return o.f;
+}
+
+/* float_to_half_fast3() from https://gist.github.com/rygorous/2156668 */
+UnsignedShort packHalf(const Float value) {
+    constexpr const FloatBits FloatInfinity{255 << 23};
+    constexpr const FloatBits HalfInfinity{31 << 23};
+    constexpr const FloatBits Magic{15 << 23};
+    constexpr const UnsignedInt SignMask = 0x80000000u;
+    constexpr const UnsignedInt RoundMask = ~0xfffu;
+
+    FloatBits f;
+    f.f = value;
+    UnsignedShort h;
+
+    const UnsignedInt sign = f.u & SignMask;
+    f.u ^= sign;
+
+    /* Note: all the integer compares in this function can be safely compiled
+       into signed compares since all operands are below 0x80000000. Important
+       if you want fast straight SSE2 code (since there's no unsigned PCMPGTD). */
+
+    /* Inf or NaN (all exponent bits set): NaN->qNaN and Inf->Inf */
+    if(f.u >= FloatInfinity.u) {
+        h = (f.u > FloatInfinity.u) ? 0x7e00 : 0x7c00;
+
+    /* (De)normalized number or zero */
+    } else {
+        f.u &= RoundMask;
+        f.f *= Magic.f;
+        f.u -= RoundMask;
+
+        /* Clamp to signed infinity if overflowed */
+        if (f.u > HalfInfinity.u) f.u = HalfInfinity.u;
+
+        /* Take the bits! */
+        h = f.u >> 13;
+    }
+
+    h |= sign >> 16;
+    return h;
+}
+
+}}
diff --git a/src/Magnum/Math/Packing.h b/src/Magnum/Math/Packing.h
index 909ce820b..c4641edfc 100644
--- a/src/Magnum/Math/Packing.h
+++ b/src/Magnum/Math/Packing.h
@@ -187,6 +187,49 @@ template<class Integral, class FloatingPoint> CORRADE_DEPRECATED("use pack() ins
 }
 #endif
 
+/**
+@brief Pack 32-bit float value into 16-bit half-float representation
+
+See [Wikipedia](https://en.wikipedia.org/wiki/Half-precision_floating-point_format)
+for more information about half floats. NaNs are converted to NaNs and
+infinities to infinities, though their exact bit pattern is not preserved. Note
+that rounding mode is unspecified in order to save some cycles.
+
+Implementation based on CC0 / public domain code by *Fabian Giesen*,
+https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ .
+@see @ref unpackHalf()
+*/
+MAGNUM_EXPORT UnsignedShort packHalf(Float value);
+
+/** @overload */
+template<std::size_t size> Vector<size, UnsignedShort> packHalf(const Vector<size, Float>& value) {
+    Vector<size, UnsignedShort> out{NoInit};
+    for(std::size_t i = 0; i != size; ++i)
+        out[i] = packHalf(value[i]);
+    return out;
+}
+
+/**
+@brief Unpack 16-bit half-float value into 32-bit float representation
+
+See [Wikipedia](https://en.wikipedia.org/wiki/Half-precision_floating-point_format)
+for more information about half floats. NaNs are converted to NaNs and
+infinities to infinities, though their exact bit pattern is not preserved.
+
+Implementation based on CC0 / public domain code by *Fabian Giesen*,
+https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ .
+@see @ref packHalf()
+*/
+MAGNUM_EXPORT Float unpackHalf(UnsignedShort value);
+
+/** @overload */
+template<std::size_t size> Vector<size, Float> unpackHalf(const Vector<size, UnsignedShort>& value) {
+    Vector<size, Float> out{NoInit};
+    for(std::size_t i = 0; i != size; ++i)
+        out[i] = unpackHalf(value[i]);
+    return out;
+}
+
 }}
 
 #endif
diff --git a/src/Magnum/Math/Test/CMakeLists.txt b/src/Magnum/Math/Test/CMakeLists.txt
index 887c8165c..f1f1be775 100644
--- a/src/Magnum/Math/Test/CMakeLists.txt
+++ b/src/Magnum/Math/Test/CMakeLists.txt
@@ -26,6 +26,7 @@
 corrade_add_test(MathBoolVectorTest BoolVectorTest.cpp LIBRARIES MagnumMathTestLib)
 corrade_add_test(MathConstantsTest ConstantsTest.cpp LIBRARIES MagnumMathTestLib)
 corrade_add_test(MathFunctionsTest FunctionsTest.cpp LIBRARIES MagnumMathTestLib)
+corrade_add_test(MathHalfTest HalfTest.cpp LIBRARIES MagnumMathTestLib)
 corrade_add_test(MathPackingTest PackingTest.cpp LIBRARIES MagnumMathTestLib)
 corrade_add_test(MathTagsTest TagsTest.cpp LIBRARIES MagnumMathTestLib)
 corrade_add_test(MathTypeTraitsTest TypeTraitsTest.cpp LIBRARIES MagnumMathTestLib)
diff --git a/src/Magnum/Math/Test/HalfTest.cpp b/src/Magnum/Math/Test/HalfTest.cpp
new file mode 100644
index 000000000..2c8f88c85
--- /dev/null
+++ b/src/Magnum/Math/Test/HalfTest.cpp
@@ -0,0 +1,472 @@
+/*
+    This file is part of Magnum.
+
+    Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016
+              Vladimír Vondruš <mosra@centrum.cz>
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+*/
+
+#include <Corrade/TestSuite/Tester.h>
+
+#include "Magnum/Math/Packing.h"
+#include "Magnum/Math/Vector3.h"
+
+namespace Magnum { namespace Math { namespace Test {
+
+struct HalfTest: Corrade::TestSuite::Tester {
+    explicit HalfTest();
+
+    void unpack();
+    void pack();
+    void repack();
+
+    void unpack1k();
+    void unpack1kNaive();
+    void unpack1kTable();
+    void pack1k();
+    void pack1kNaive();
+    void pack1kTable();
+
+    private:
+        /* Naive / ground-truth packing helpers */
+        UnsignedShort packNaive(Float value);
+        Float unpackNaive(UnsignedShort value);
+
+        /* Table-based packing helpers */
+        UnsignedInt convertMantissa(UnsignedInt i);
+        UnsignedShort packTable(Float value);
+        Float unpackTable(UnsignedShort value);
+
+        UnsignedInt _mantissaTable[2048];
+        UnsignedInt _exponentTable[64];
+        UnsignedShort _offsetTable[64];
+        UnsignedShort _baseTable[512];
+        UnsignedByte _shiftTable[512];
+};
+
+typedef Math::Constants<Float> Constants;
+
+HalfTest::HalfTest() {
+    addTests({&HalfTest::unpack,
+              &HalfTest::pack});
+
+    addRepeatedTests({&HalfTest::repack}, 65536);
+
+    addBenchmarks({
+        &HalfTest::unpack1k,
+        &HalfTest::unpack1kNaive,
+        &HalfTest::unpack1kTable,
+        &HalfTest::pack1k,
+        &HalfTest::pack1kNaive,
+        &HalfTest::pack1kTable}, 100);
+
+    /* Calculate tables for table-based benchmark */
+    _mantissaTable[0] = 0;
+    for(std::size_t i = 1; i != 1024; ++i)
+        _mantissaTable[i] = convertMantissa(i);
+    for(std::size_t i = 1024; i != 2048; ++i)
+        _mantissaTable[i] = 0x38000000 + ((i - 1024) << 13);
+
+    _exponentTable[0] = 0;
+    for(std::size_t i = 1; i != 31; ++i)
+        _exponentTable[i] = i << 23;
+    _exponentTable[31] = 0x47800000;
+    _exponentTable[32] = 0x80000000;
+    for(std::size_t i = 33; i != 63; ++i)
+        _exponentTable[i] = 0x80000000 + ((i - 32) << 23);
+    _exponentTable[63] = 0xc7800000;
+
+    for(std::size_t i = 0; i != 64; ++i)
+        _offsetTable[i] = 1024;
+    _offsetTable[0] = 0;
+    _offsetTable[32] = 0;
+
+    for(std::int_fast32_t i = 0; i != 256; ++i) {
+        std::int_fast32_t e = i - 127;
+        if(e < -24) {
+            _baseTable[i | 0x000] = 0x0000;
+            _baseTable[i | 0x100] = 0x8000;
+            _shiftTable[i | 0x000] = 24;
+            _shiftTable[i | 0x100] = 24;
+        } else if(e < -14) {
+            _baseTable[i | 0x000] = (0x0400 >> (-e - 14));
+            _baseTable[i | 0x100] = (0x0400 >> (-e - 14)) | 0x8000;
+            _shiftTable[i | 0x000] = -e - 1;
+            _shiftTable[i | 0x100] = -e - 1;
+        } else if(e <= 15) {
+            _baseTable[i | 0x000] = ((e + 15) << 10);
+            _baseTable[i | 0x100] = ((e + 15) << 10) | 0x8000;
+            _shiftTable[i | 0x000] = 13;
+            _shiftTable[i | 0x100] = 13;
+        } else if(e < 128) {
+            _baseTable[i | 0x000] = 0x7c00;
+            _baseTable[i | 0x100] = 0xfc00;
+            _shiftTable[i | 0x000] = 24;
+            _shiftTable[i | 0x100] = 24;
+        } else {
+            _baseTable[i | 0x000] = 0x7c00;
+            _baseTable[i | 0x100] = 0xfc00;
+            _shiftTable[i | 0x000] = 13;
+            _shiftTable[i | 0x100] = 13;
+        }
+    }
+}
+
+namespace {
+
+union FloatBits {
+    UnsignedInt u;
+    Float f;
+    struct {
+        UnsignedInt mantissa:23;
+        UnsignedInt exponent:8;
+        UnsignedInt sign:1;
+    } bits;
+};
+
+union HalfBits {
+    UnsignedShort u;
+    struct {
+        UnsignedShort mantissa:10;
+        UnsignedShort exponent:5;
+        UnsignedShort sign:1;
+    } bits;
+};
+
+}
+
+/* float_to_half_full() from https://gist.github.com/rygorous/2156668,
+   originally from ISPC */
+UnsignedShort HalfTest::packNaive(Float value) {
+    FloatBits f;
+    f.f = value;
+    HalfBits o{};
+
+    /* Signed zero/denormal (which will underflow) */
+    if(f.bits.exponent == 0) {
+        o.bits.exponent = 0;
+
+    /* Inf or NaN (all exponent bits set): NaN->qNaN and Inf->Inf */
+    } else if(f.bits.exponent == 255) {
+        o.bits.exponent = 31;
+        o.bits.mantissa = f.bits.mantissa ? 0x200 : 0;
+
+    /* Normalized number */
+    } else {
+        /* Exponent unbias the single, then bias the halfp */
+        Int newexp = f.bits.exponent - 127 + 15;
+
+        /* Overflow, return signed infinity */
+        if(newexp >= 31) {
+            o.bits.exponent = 31;
+
+        /* Underflow */
+        } else if(newexp <= 0) {
+            /* Mantissa might be non-zero */
+            if((14 - newexp) <= 24) {
+                /* Hidden 1 bit */
+                UnsignedInt mant = f.bits.mantissa | 0x800000;
+                o.bits.mantissa = mant >> (14 - newexp);
+
+                /* Check for rounding */
+                if((mant >> (13 - newexp)) & 1) {
+                    /* Round, might overflow into exp bit, but this is OK */
+                    o.u++;
+                }
+            }
+        } else {
+            o.bits.exponent = newexp;
+            o.bits.mantissa = f.bits.mantissa >> 13;
+
+            /* Check for rounding */
+            if(f.bits.mantissa & 0x1000) {
+                /* Round, might overflow to inf, this is OK */
+                o.u++;
+            }
+        }
+    }
+
+    o.bits.sign = f.bits.sign;
+    return o.u;
+}
+
+/* half_to_float_full() from https://gist.github.com/rygorous/2144712,
+   originally from ISPC */
+Float HalfTest::unpackNaive(UnsignedShort value) {
+    HalfBits h{value};
+    FloatBits o{};
+
+    /* (Signed) zero */
+    if(h.bits.exponent == 0 && h.bits.mantissa == 0) {
+        o.bits.sign = h.bits.sign;
+
+    } else {
+        /* Denormal (will convert to normalized) */
+        if(h.bits.exponent == 0) {
+            /* Adjust mantissa so it's normalized (and keep track of exp
+               adjust) */
+            Int e = -1;
+            UnsignedInt m = h.bits.mantissa;
+            do {
+                e++;
+                m <<= 1;
+            } while((m & 0x400) == 0);
+
+            o.bits.mantissa = (m & 0x3ff) << 13;
+            o.bits.exponent = 127 - 15 - e;
+            o.bits.sign = h.bits.sign;
+
+        /* Inf/NaN */
+        } else if(h.bits.exponent == 0x1f) {
+            /* Note: it's safe to treat both with the same code path by just
+               truncating lower Mantissa bits in NaNs (this is valid). */
+            o.bits.mantissa = h.bits.mantissa << 13;
+            o.bits.exponent = 255;
+            o.bits.sign = h.bits.sign;
+
+        /* Normalized number */
+        } else {
+            o.bits.mantissa = h.bits.mantissa << 13;
+            o.bits.exponent = 127 - 15 + h.bits.exponent;
+            o.bits.sign = h.bits.sign;
+        }
+    }
+
+    return o.f;
+}
+
+/* Jeroen van der Zijp -- Fast Half Float Conversions, 2008,
+   ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf */
+UnsignedInt HalfTest::convertMantissa(UnsignedInt i) {
+    UnsignedInt m = i << 13;
+    UnsignedInt e = 0;
+
+    while(!(m & 0x00800000)) {
+        e -= 0x00800000;
+        m <<= 1;
+    }
+
+    m &= ~0x00800000;
+    e += 0x38800000;
+    return m | e;
+}
+
+UnsignedShort HalfTest::packTable(Float value) {
+    const UnsignedInt v = reinterpret_cast<const UnsignedInt&>(value);
+    return _baseTable[(v >> 23) & 0x1ff] + ((v & 0x007fffff) >> _shiftTable[(v >> 23) & 0x1ff]);
+}
+
+Float HalfTest::unpackTable(UnsignedShort value) {
+    UnsignedInt result = _mantissaTable[_offsetTable[value >> 10] + (value & 0x3ff)] + _exponentTable[value >> 10];
+    return reinterpret_cast<Float&>(result);
+}
+
+void HalfTest::unpack() {
+    CORRADE_COMPARE(Math::unpackHalf(0x0000), 0.0f);
+    CORRADE_COMPARE(Math::unpackHalf(0x3c00), 1.0f);
+    CORRADE_COMPARE(Math::unpackHalf(0x4000), 2.0f);
+    CORRADE_COMPARE(Math::unpackHalf(0x4200), 3.0f);
+
+    CORRADE_COMPARE(unpackNaive(0x0000), 0.0f);
+    CORRADE_COMPARE(unpackNaive(0x3c00), 1.0f);
+    CORRADE_COMPARE(unpackNaive(0x4000), 2.0f);
+    CORRADE_COMPARE(unpackNaive(0x4200), 3.0f);
+
+    CORRADE_COMPARE(unpackTable(0x0000), 0.0f);
+    CORRADE_COMPARE(unpackTable(0x3c00), 1.0f);
+    CORRADE_COMPARE(unpackTable(0x4000), 2.0f);
+    CORRADE_COMPARE(unpackTable(0x4200), 3.0f);
+
+    /* Normals, denormals, specials */
+    CORRADE_COMPARE(Math::unpackHalf(0x8dc2), -0.000351f);
+    CORRADE_COMPARE(Math::unpackHalf(0x57bc), 123.75f);
+    CORRADE_COMPARE(Math::unpackHalf(0xfe00), -Constants::nan());
+    CORRADE_COMPARE(Math::unpackHalf(0x7e00), +Constants::nan());
+    CORRADE_COMPARE(Math::unpackHalf(0xfc00), -Constants::inf());
+    CORRADE_COMPARE(Math::unpackHalf(0x7c00), +Constants::inf());
+
+    CORRADE_COMPARE(unpackNaive(0x8dc2), -0.000351f);
+    CORRADE_COMPARE(unpackNaive(0x57bc), 123.75f);
+    CORRADE_COMPARE(unpackNaive(0xfe00), -Constants::nan());
+    CORRADE_COMPARE(unpackNaive(0x7e00), +Constants::nan());
+    CORRADE_COMPARE(unpackNaive(0xfc00), -Constants::inf());
+    CORRADE_COMPARE(unpackNaive(0x7c00), +Constants::inf());
+
+    CORRADE_COMPARE(unpackTable(0x8dc2), -0.000351f);
+    CORRADE_COMPARE(unpackTable(0x57bc), 123.75f);
+    CORRADE_COMPARE(unpackTable(0xfe00), -Constants::nan());
+    CORRADE_COMPARE(unpackTable(0x7e00), +Constants::nan());
+    CORRADE_COMPARE(unpackTable(0xfc00), -Constants::inf());
+    CORRADE_COMPARE(unpackTable(0x7c00), +Constants::inf());
+
+    /* Vector */
+    CORRADE_COMPARE(Math::unpackHalf(Math::Vector3<UnsignedShort>{0x0000, 0x4200, 0x3c00}),
+        (Math::Vector3<Float>{0.0f, 3.0f, 1.0f}));
+}
+
+void HalfTest::pack() {
+    CORRADE_COMPARE(Math::packHalf(0.0f), 0x0000);
+    CORRADE_COMPARE(Math::packHalf(1.0f), 0x3c00);
+    CORRADE_COMPARE(Math::packHalf(2.0f), 0x4000);
+    CORRADE_COMPARE(Math::packHalf(3.0f), 0x4200);
+
+    CORRADE_COMPARE(packNaive(0.0f), 0x0000);
+    CORRADE_COMPARE(packNaive(1.0f), 0x3c00);
+    CORRADE_COMPARE(packNaive(2.0f), 0x4000);
+    CORRADE_COMPARE(packNaive(3.0f), 0x4200);
+
+    CORRADE_COMPARE(packTable(0.0f), 0x0000);
+    CORRADE_COMPARE(packTable(1.0f), 0x3c00);
+    CORRADE_COMPARE(packTable(2.0f), 0x4000);
+    CORRADE_COMPARE(packTable(3.0f), 0x4200);
+
+    /* Rounding */
+    CORRADE_COMPARE(Math::unpackHalf(Math::packHalf(-1024.01f)), -1024.0f);
+    CORRADE_COMPARE(Math::unpackHalf(Math::packHalf(-1024.50f)), -1025.0f);
+    CORRADE_COMPARE(Math::unpackHalf(Math::packHalf(-1024.99f)), -1025.0f);
+    CORRADE_COMPARE(Math::unpackHalf(Math::packHalf(+1024.01f)), +1024.0f);
+    CORRADE_COMPARE(Math::unpackHalf(Math::packHalf(+1024.50f)), +1025.0f);
+    CORRADE_COMPARE(Math::unpackHalf(Math::packHalf(+1024.99f)), +1025.0f);
+
+    /* Don't care about rounding behavior of the others */
+
+    /* Normals, denormals, specials */
+    CORRADE_COMPARE(Math::packHalf(-0.000351512f), 0x8dc2);
+    CORRADE_COMPARE(Math::packHalf(123.7567f), 0x57bc);
+    /* Emscripten doesn't differentiate NaNs and treats their sign slightly
+       differently on different optimization levels. On MSVC they are somehow
+       flipped around, so I'm testing w/o the sign. */
+    CORRADE_COMPARE(Math::packHalf(-Constants::nan()) & ~0x8000, 0x7e00);
+    CORRADE_COMPARE(Math::packHalf(+Constants::nan()) & ~0x8000, 0x7e00);
+    CORRADE_COMPARE(Math::packHalf(-Constants::inf()), 0xfc00);
+    CORRADE_COMPARE(Math::packHalf(+Constants::inf()), 0x7c00);
+
+    CORRADE_COMPARE(packNaive(-0.000351512f), 0x8dc2);
+    CORRADE_COMPARE(packNaive(123.7567f), 0x57bc);
+    /* Emscripten doesn't differentiate NaNs and treats their sign slightly
+       differently on different optimization levels. On MSVC they are somehow
+       flipped around, so I'm testing w/o the sign. */
+    CORRADE_COMPARE(packNaive(-Constants::nan()) & ~0x8000, 0x7e00);
+    CORRADE_COMPARE(packNaive(+Constants::nan()) & ~0x8000, 0x7e00);
+    CORRADE_COMPARE(packNaive(-Constants::inf()), 0xfc00);
+    CORRADE_COMPARE(packNaive(+Constants::inf()), 0x7c00);
+
+    CORRADE_COMPARE(packTable(-0.000351512f), 0x8dc2);
+    CORRADE_COMPARE(packTable(123.7567f), 0x57bc);
+    /* Emscripten doesn't differentiate NaNs and treats their sign slightly
+       differently on different optimization levels. On MSVC they are somehow
+       flipped around, so I'm testing w/o the sign. */
+    CORRADE_COMPARE(packTable(-Constants::nan()) & ~0x8000, 0x7e00);
+    CORRADE_COMPARE(packTable(+Constants::nan()) & ~0x8000, 0x7e00);
+    CORRADE_COMPARE(packTable(-Constants::inf()), 0xfc00);
+    CORRADE_COMPARE(packTable(+Constants::inf()), 0x7c00);
+
+    /* Vector */
+    CORRADE_COMPARE(Math::packHalf(Math::Vector3<Float>{0.0f, 3.0f, 1.0f}),
+        (Math::Vector3<UnsignedShort>{0x0000, 0x4200, 0x3c00}));
+}
+
+void HalfTest::repack() {
+    UnsignedShort in = testCaseRepeatId();
+    Float result = Math::unpackHalf(in);
+    Float resultNaive = unpackNaive(in);
+    Float resultTable = unpackTable(in);
+
+    /* NaNs don't rountrip, but that's okay */
+    if(result != result) {
+        CORRADE_VERIFY(result != result);
+        CORRADE_VERIFY(resultNaive != resultNaive);
+        CORRADE_VERIFY(resultTable != resultTable);
+
+    /* Otherwise verify that both algos give the same results */
+    } else {
+        CORRADE_COMPARE(result, resultTable);
+        CORRADE_COMPARE(result, resultNaive);
+
+        CORRADE_COMPARE(Math::packHalf(result), in);
+        CORRADE_COMPARE(packTable(result), in);
+        CORRADE_COMPARE(packNaive(result), in);
+    }
+}
+
+void HalfTest::pack1k() {
+    UnsignedInt out = 0;
+    CORRADE_BENCHMARK(100)
+        for(std::uint_fast16_t i = 0; i != 1000; ++i)
+            out += Math::packHalf(Float(i)*65);
+
+    /* To avoid optimizing things out */
+    CORRADE_VERIFY(out);
+}
+
+void HalfTest::pack1kNaive() {
+    UnsignedInt out = 0;
+    CORRADE_BENCHMARK(100)
+        for(std::uint_fast16_t i = 0; i != 1000; ++i)
+            out += packNaive(Float(i)*65);
+
+    /* To avoid optimizing things out */
+    CORRADE_VERIFY(out);
+}
+
+void HalfTest::pack1kTable() {
+    UnsignedInt out = 0;
+    CORRADE_BENCHMARK(100)
+        for(std::uint_fast16_t i = 0; i != 1000; ++i)
+            out += packTable(Float(i)*65);
+
+    /* To avoid optimizing things out */
+    CORRADE_VERIFY(out);
+}
+
+void HalfTest::unpack1k() {
+    Float out = 0.0f;
+    CORRADE_BENCHMARK(100)
+        for(std::uint_fast16_t i = 0; i != 1000; ++i)
+            out += Math::unpackHalf(i*65);
+
+    /* To avoid optimizing things out */
+    CORRADE_VERIFY(out);
+}
+
+void HalfTest::unpack1kNaive() {
+    Float out = 0.0f;
+    CORRADE_BENCHMARK(100)
+        for(std::uint_fast16_t i = 0; i != 1000; ++i)
+            out += unpackNaive(i*65);
+
+    /* To avoid optimizing things out */
+    CORRADE_VERIFY(out);
+}
+
+void HalfTest::unpack1kTable() {
+    Float out = 0.0f;
+    CORRADE_BENCHMARK(100)
+        for(std::uint_fast16_t i = 0; i != 1000; ++i)
+            out += unpackTable(i*65);
+
+    /* To avoid optimizing things out */
+    CORRADE_VERIFY(out);
+}
+
+}}}
+
+CORRADE_TEST_MAIN(Magnum::Math::Test::HalfTest)
+
diff --git a/src/Magnum/Math/Test/PackingTest.cpp b/src/Magnum/Math/Test/PackingTest.cpp
index 0e5e2a33b..30494b443 100644
--- a/src/Magnum/Math/Test/PackingTest.cpp
+++ b/src/Magnum/Math/Test/PackingTest.cpp
@@ -43,6 +43,9 @@ struct PackingTest: Corrade::TestSuite::Tester {
     void reunpackUnsinged();
     void reunpackSinged();
     void unpackTypeDeduction();
+
+    /* Half (un)pack functions are tested and benchmarked in HalfTest.cpp,
+       because there's involved comparison and benchmarks to ground truth */
 };
 
 typedef Math::Vector3<Float> Vector3;
@@ -279,4 +282,3 @@ void PackingTest::unpackTypeDeduction() {
 }}}
 
 CORRADE_TEST_MAIN(Magnum::Math::Test::PackingTest)
-
diff --git a/src/Magnum/PixelFormat.h b/src/Magnum/PixelFormat.h
index 1c891fb96..3a1d72bb1 100644
--- a/src/Magnum/PixelFormat.h
+++ b/src/Magnum/PixelFormat.h
@@ -375,6 +375,7 @@ enum class PixelType: GLenum {
 
     /**
      * Each component half float.
+     * @see @ref Math::packHalf(), @ref Math::unpackHalf()
      * @requires_gl30 Extension @extension{ARB,half_float_pixel}
      * @requires_gles30 For texture data only, extension
      *      @es_extension2{OES,texture_half_float,OES_texture_float} in OpenGL
diff --git a/src/Magnum/TextureFormat.h b/src/Magnum/TextureFormat.h
index b0c9f9929..90c0acb95 100644
--- a/src/Magnum/TextureFormat.h
+++ b/src/Magnum/TextureFormat.h
@@ -513,6 +513,7 @@ enum class TextureFormat: GLenum {
 
     /**
      * Red component, half float.
+     * @see @ref Math::packHalf(), @ref Math::unpackHalf()
      * @requires_gl30 Extension @extension{ARB,texture_rg} and @extension{ARB,texture_float}
      * @requires_gles30 Only normalized integral formats are available in
      *      OpenGL ES 2.0.
@@ -523,6 +524,7 @@ enum class TextureFormat: GLenum {
 
     /**
      * Red and green component, each half float.
+     * @see @ref Math::packHalf(), @ref Math::unpackHalf()
      * @requires_gl30 Extension @extension{ARB,texture_rg} and @extension{ARB,texture_float}
      * @requires_gles30 Only normalized integral formats are available in
      *      OpenGL ES 2.0.
@@ -533,6 +535,7 @@ enum class TextureFormat: GLenum {
 
     /**
      * RGB, each component half float.
+     * @see @ref Math::packHalf(), @ref Math::unpackHalf()
      * @requires_gl30 Extension @extension{ARB,texture_float}
      * @requires_gles30 Only normalized integral formats are available in
      *      OpenGL ES 2.0.
@@ -543,6 +546,7 @@ enum class TextureFormat: GLenum {
 
     /**
      * RGBA, each component half float.
+     * @see @ref Math::packHalf(), @ref Math::unpackHalf()
      * @requires_gl30 Extension @extension{ARB,texture_float}
      * @requires_gles30 Only normalized integral formats are available in
      *      OpenGL ES 2.0.