diff --git a/doc/changelog.dox b/doc/changelog.dox
index 73776610b..eec01f8ac 100644
--- a/doc/changelog.dox
+++ b/doc/changelog.dox
@@ -262,6 +262,9 @@ See also:
     @ref Math::Color4::fromLinearRgbaInt() as counterparts to
     @ref Math::Color3::fromSrgbInt(), @ref Math::Color4::fromSrgbInt() and
     @ref Math::Color4::fromSrgbAlphaInt() that don't perform a sRGB conversion
+-   Added @ref Math::Color4::premultiplied() and
+    @relativeref{Math::Color4,unpremultiplied()} for converting colors to and
+    from premultiplied alpha representation
 -   New @ref Math::DualComplex::from(const Complex<T>&, const Vector2<T>&) and
     @ref Math::DualQuaternion::from(const Quaternion<T>&, const Vector3<T>&)
     functions mirroring @ref Math::Matrix3::from(const Matrix2x2<T>&, const Vector2<T>&)
diff --git a/src/Magnum/Math/Color.h b/src/Magnum/Math/Color.h
index 3ae9d0a75..083948f67 100644
--- a/src/Magnum/Math/Color.h
+++ b/src/Magnum/Math/Color.h
@@ -248,6 +248,43 @@ template<class T, typename std::enable_if<IsIntegral<T>::value, int>::type = 0>
     return toXyz<typename Color3<T>::FloatingPointType>(unpack<Color3<typename Color3<T>::FloatingPointType>>(rgb));
 }
 
+/* Alpha (un)premultiplication */
+template<class T, typename std::enable_if<IsFloatingPoint<T>::value, int>::type = 0> constexpr Color4<T> premultiplied(const Color4<T>& color) {
+    return {color.rgb()*color.a(), color.a()};
+}
+template<class T, typename std::enable_if<IsIntegral<T>::value, int>::type = 0> constexpr Color4<T> premultiplied(const Color4<T>& color) {
+    /* The + 0.5 is to round the value to nearest integer instead of flooring.
+       Not using round() to have this constexpr. See premultipliedRoundtrip()
+       for a verification this exactly matches pack()/unpack() behavior. */
+    return {
+        T(typename Color4<T>::FloatingPointType(color.r())*color.a()/bitMax<T>() + typename Color4<T>::FloatingPointType(0.5)),
+        T(typename Color4<T>::FloatingPointType(color.g())*color.a()/bitMax<T>() + typename Color4<T>::FloatingPointType(0.5)),
+        T(typename Color4<T>::FloatingPointType(color.b())*color.a()/bitMax<T>() + typename Color4<T>::FloatingPointType(0.5)),
+        color.a()
+    };
+}
+
+template<class T, typename std::enable_if<IsFloatingPoint<T>::value, int>::type = 0> constexpr Color4<T> unpremultiplied(const Color4<T>& color) {
+    /* If alpha is zero, zero the RGB channels. Could keep them unchanged, but
+       that would add unnecessary variation to the output. */
+    return {color.a() == T(0) ? Color3<T>{} : color.rgb()/color.a(), color.a()};
+}
+template<class T, typename std::enable_if<IsIntegral<T>::value, int>::type = 0> constexpr Color4<T> unpremultiplied(const Color4<T>& color) {
+    /* Additionally also clamp the RGB channels so the division doesn't go over
+       1, as with the packed type it would result in overflow. The + 0.5 is to
+       round the value to nearest integer instead of flooring. Not using
+       round() to have this constexpr. Unlike premultiplied(), this does *not*
+       match pack()/unpack() behavior as this leads to better precision,
+       statistically speaking. See the unpremultipliedRoundtrip() test for
+       details. */
+    return color.a() == T(0) ? Color4<T>{} : Color4<T>{
+        T(typename Color4<T>::FloatingPointType(min(color.r(), color.a()))*bitMax<T>()/color.a() + typename Color4<T>::FloatingPointType(0.5)),
+        T(typename Color4<T>::FloatingPointType(min(color.g(), color.a()))*bitMax<T>()/color.a() + typename Color4<T>::FloatingPointType(0.5)),
+        T(typename Color4<T>::FloatingPointType(min(color.b(), color.a()))*bitMax<T>()/color.a() + typename Color4<T>::FloatingPointType(0.5)),
+        color.a()
+    };
+}
+
 /* Value for full channel (1.0f for floats, 255 for unsigned byte) */
 #if !defined(CORRADE_MSVC2017_COMPATIBILITY) || defined(CORRADE_MSVC2015_COMPATIBILITY)
 /* MSVC 2017 since 15.8 crashes with the following at a constructor line that
@@ -1204,6 +1241,48 @@ class Color4: public Vector4<T> {
             return Implementation::toXyz<T>(rgb());
         }
 
+        /**
+         * @brief Color with premultiplied alpha
+         * @m_since_latest
+         *
+         * The resulting color has RGB channels always less than or equal to
+         * alpha. Note that premultiplication isn't a reversible operation ---
+         * if alpha is zero, RGB channels become zero as well and
+         * @ref unpremultiplied() won't be able to recover the original values
+         * back. @f[
+         *  \boldsymbol{c_\mathrm{premult}} = (\boldsymbol{c_{rgb}} c_a, c_a)
+         * @f]
+         */
+        constexpr Color4<T> premultiplied() const {
+            return Implementation::premultiplied(*this);
+        }
+
+        /**
+         * @brief Color with unpremultiplied alpha
+         * @m_since_latest
+         *
+         * Assuming the input has premultiplied alpha, such as coming from
+         * @ref premultiplied(), returns an unpremultiplied color. Note that
+         * premultiplication isn't a reversible operation --- if alpha is zero,
+         * the RGB channels will be set to zero as well. @f[
+         *      \boldsymbol{c} = \begin{cases}
+         *          \boldsymbol{0}, & {c_\mathrm{premult}}_a = 0 \\
+         *          (\cfrac{\boldsymbol{{c_\mathrm{premult}}_{rgb}}}{c_a}, {c_\mathrm{premult}}_a) & {c_\mathrm{premult}}_a > 0
+         *      \end{cases}
+         * @f]
+         *
+         * Additionally, with packed types such as @ref Color4ub, RGB channels
+         * are clamped to avoid overflow: @f[
+         *      \boldsymbol{c} = \begin{cases}
+         *          \boldsymbol{0}, & {c_\mathrm{premult}}_a = 0 \\
+         *          (\cfrac{\min(\boldsymbol{{c_\mathrm{premult}}_{rgb}}, {c_\mathrm{premult}}_a)}{c_a}, {c_\mathrm{premult}}_a) & {c_\mathrm{premult}}_a > 0
+         *      \end{cases}
+         * @f]
+         */
+        constexpr Color4<T> unpremultiplied() const {
+            return Implementation::unpremultiplied(*this);
+        }
+
         /* Overloads to remove WTF-factor from return types */
         #ifndef DOXYGEN_GENERATING_OUTPUT
         Color3<T>& xyz() { return Color3<T>::from(Vector4<T>::data()); }
diff --git a/src/Magnum/Math/Test/ColorTest.cpp b/src/Magnum/Math/Test/ColorTest.cpp
index c0f12effd..67fdff8f7 100644
--- a/src/Magnum/Math/Test/ColorTest.cpp
+++ b/src/Magnum/Math/Test/ColorTest.cpp
@@ -128,6 +128,11 @@ struct ColorTest: TestSuite::Tester {
     void fromXyzDefaultAlpha();
     void xyY();
 
+    void premultiplied();
+    template<class T> void premultipliedRoundtrip();
+    void unpremultiplied();
+    template<class T> void unpremultipliedRoundtrip();
+
     void multiplyDivideIntegral();
 
     void strictWeakOrdering();
@@ -173,6 +178,15 @@ using Magnum::Deg;
 
 using namespace Literals;
 
+const struct {
+    Int r, g, b;
+} UnpremultipliedRoundtripData[]{
+    {10, 8, 2}, /* same as in premultipliedRoundtrip() */
+    {4, 5, 0},
+    {9, 6, 7},
+    /** @todo for 1 and 3 it results in less precision, what to do? */
+};
+
 #if defined(CORRADE_TARGET_UNIX) || (defined(CORRADE_TARGET_WINDOWS) && !defined(CORRADE_TARGET_WINDOWS_RT)) || defined(CORRADE_TARGET_EMSCRIPTEN)
 const struct {
     const char* name;
@@ -261,7 +275,17 @@ ColorTest::ColorTest() {
               &ColorTest::fromXyzDefaultAlpha,
               &ColorTest::xyY,
 
-              &ColorTest::multiplyDivideIntegral,
+              &ColorTest::premultiplied,
+              &ColorTest::premultipliedRoundtrip<UnsignedByte>,
+              &ColorTest::premultipliedRoundtrip<UnsignedShort>,
+              &ColorTest::unpremultiplied});
+
+    addInstancedTests<ColorTest>({
+        &ColorTest::unpremultipliedRoundtrip<UnsignedByte>,
+        &ColorTest::unpremultipliedRoundtrip<UnsignedShort>},
+        Containers::arraySize(UnpremultipliedRoundtripData));
+
+    addTests({&ColorTest::multiplyDivideIntegral,
 
               &ColorTest::strictWeakOrdering,
 
@@ -1159,6 +1183,173 @@ void ColorTest::xyY() {
     CORRADE_COMPARE(xyYToXyz(xyY), xyz);
 }
 
+void ColorTest::premultiplied() {
+    /* Usual scenario */
+    CORRADE_COMPARE((Color4{0.6f, 0.8f, 0.4f, 0.25f}).premultiplied(), (Color4{0.15f, 0.2f, 0.1f, 0.25f}));
+    /* Slight imprecision with packed types */
+    CORRADE_COMPARE((Color4us{0x9999, 0xcccc, 0x6666, 0x3fff}).premultiplied(), (Color4us{0x2666, 0x3332, 0x1999, 0x3fff}));
+    /* Lol it wants to treat _rgba.premultiplied() as a literal suffix as a
+       whole?! Load-bearing space?! */
+    CORRADE_COMPARE(0x99cc663f_rgba .premultiplied(), 0x2632193f_rgba);
+
+    /* Zero alpha just zeroes out the rest, no special treatment */
+    CORRADE_COMPARE((Color4{0.6f, 0.8f, 0.4f, 0.0f}).premultiplied(), (Color4{0.0f, 0.0f, 0.0f, 0.0f}));
+    CORRADE_COMPARE((Color4us{0, 0, 0, 0}).premultiplied(), (Color4us{0, 0, 0, 0}));
+    CORRADE_COMPARE(0x00000000_rgba .premultiplied(), 0x00000000_rgba);
+
+    /* RGB channels over 1 aren't treated in any special way */
+    CORRADE_COMPARE((Color4{1.6f, 1.8f, 1.4f, 0.25f}).premultiplied(), (Color4{0.4f, 0.45f, 0.35f, 0.25f}));
+    /* (no way to express this with packed types) */
+
+    constexpr Color4 a{0.6f, 0.8f, 0.4f, 0.25f};
+    constexpr Color4 ap = a.premultiplied();
+    CORRADE_COMPARE(ap, (Color4{0.15f, 0.2f, 0.1f, 0.25f}));
+
+    constexpr Color4us b{0x9999, 0xcccc, 0x6666, 0x3fff};
+    constexpr Color4us bp = b.premultiplied();
+    CORRADE_COMPARE(bp, (Color4us{0x2666, 0x3332, 0x1999, 0x3fff}));
+
+    constexpr Color4ub c = 0x99cc663f_rgba;
+    constexpr Color4ub cp = c.premultiplied();
+    CORRADE_COMPARE(cp, 0x2632193f_rgba);
+}
+
+template<class T> void ColorTest::premultipliedRoundtrip() {
+    setTestCaseTemplateName(TypeTraits<T>::name());
+
+    /* Unpacking, premultiplying and packing a color should give the same
+       result as premultiplying a packed color directly. The implementation
+       doesn't use pack() etc to be constexpr so verify the two have the same
+       rounding behavior.
+
+       This only holds for premultiplied(), with unpremultiplied() it doesn't,
+       see unpremultipliedRoundtrip() below. */
+
+    for(UnsignedInt i = 0; i != Implementation::bitMax<T>(); ++i) {
+        CORRADE_ITERATION(Debug::hex << i);
+
+        Math::Color4<T> a{
+            Math::pack<T>(1.0f), /* 0xff or 0xffff */
+            Math::pack<T>(0.8f), /* 0x99 or 0x9999 */
+            Math::pack<T>(0.2f), /* 0x33 or 0x3333 */
+            T(i)};
+        CORRADE_COMPARE(a.premultiplied(), Math::pack<Math::Color4<T>>(Math::unpack<Color4>(a).premultiplied()));
+    }
+}
+
+void ColorTest::unpremultiplied() {
+    /* Usual scenario, inverse of the above */
+    CORRADE_COMPARE((Color4{0.15f, 0.2f, 0.1f, 0.25f}).unpremultiplied(), (Color4{0.6f, 0.8f, 0.4f, 0.25f}));
+    /* Slight imprecision with packed types */
+    CORRADE_COMPARE((Color4us{0x2666, 0x3333, 0x1999, 0x3fff}).unpremultiplied(), (Color4us{0x999a, 0xccce, 0x6665, 0x3fff}));
+    /* Lol a load-bearing space again */
+    CORRADE_COMPARE(0x2633193f_rgba .unpremultiplied(), 0x9ace653f_rgba);
+
+    /* With zero alpha the RGB channels get ignored, no matter what they are */
+    CORRADE_COMPARE((Color4{0.6f, 0.8f, 0.4f, 0.0f}).unpremultiplied(), (Color4{0.0f, 0.0f, 0.0f, 0.0f}));
+    CORRADE_COMPARE((Color4us{0x6666, 0xcccc, 0xffff, 0}).unpremultiplied(), (Color4us{0, 0, 0, 0}));
+    CORRADE_COMPARE(0x33ff9900_rgba .unpremultiplied(), 0x00000000_rgba);
+
+    /* RGB channels over alpha aren't treated in any special way for floats
+       (inverse of what's tested in premultiplied()) */
+    CORRADE_COMPARE((Color4{0.4f, 0.45f, 0.35f, 0.25f}).unpremultiplied(), (Color4{1.6f, 1.8f, 1.4f, 0.25f}));
+    /* For packed types they get individually clamped -- i.e., it's not all of
+       them being set to full channel, only those that overflow */
+    CORRADE_COMPARE((Color4us{0x6666, 0x2666, 0x4000, 0x3fff}).unpremultiplied(), (Color4us{0xffff, 0x999a, 0xffff, 0x3fff}));
+    CORRADE_COMPARE(0x2666193f_rgba .unpremultiplied(), 0x9aff653f_rgba);
+
+    constexpr Color4 ap{0.15f, 0.2f, 0.1f, 0.25f};
+    constexpr Color4 apz{0.15f, 0.2f, 0.1f, 0.0f};
+    constexpr Color4 a = ap.unpremultiplied();
+    constexpr Color4 az = apz.unpremultiplied();
+    CORRADE_COMPARE(a, (Color4{0.6f, 0.8f, 0.4f, 0.25f}));
+    CORRADE_COMPARE(az, (Color4{0.0f, 0.0f, 0.0f, 0.0f}));
+
+    /* Second channel overflows here */
+    constexpr Color4us bp{0x2666, 0x6666, 0x1999, 0x3fff};
+    constexpr Color4us bpz{0x2666, 0x6666, 0x1999, 0};
+    constexpr Color4us b = bp.unpremultiplied();
+    constexpr Color4us bz = bpz.unpremultiplied();
+    CORRADE_COMPARE(b, (Color4us{0x999a, 0xffff, 0x6665, 0x3fff}));
+    CORRADE_COMPARE(bz, (Color4us{0, 0, 0, 0}));
+
+    /* First channel overflows here */
+    constexpr Color4ub cp = 0x6633193f_rgba;
+    constexpr Color4ub cpz = 0x66331900_rgba;
+    constexpr Color4ub c = cp.unpremultiplied();
+    constexpr Color4ub cz = cpz.unpremultiplied();
+    CORRADE_COMPARE(c, 0xffce653f_rgba);
+    CORRADE_COMPARE(cz, 0x00000000_rgba);
+}
+
+/* A variant of packed unpremultiplied() that does a calculation that has
+   exactly the same rounding behavior as unpack() followed by pack(). */
+template<class T> constexpr Math::Color4<T> unpremultipliedPackedExact(const Math::Color4<T>& color) {
+    return color.a() == T(0) ? Math::Color4<T>{} : Math::Color4<T>{
+        T(Implementation::bitMax<T>()*((typename Math::Color4<T>::FloatingPointType(min(color.r(), color.a()))/Implementation::bitMax<T>())/(typename Math::Color4<T>::FloatingPointType(color.a())/Implementation::bitMax<T>())) + typename Math::Color4<T>::FloatingPointType(0.5)),
+        T(Implementation::bitMax<T>()*((typename Math::Color4<T>::FloatingPointType(min(color.g(), color.a()))/Implementation::bitMax<T>())/(typename Math::Color4<T>::FloatingPointType(color.a())/Implementation::bitMax<T>())) + typename Math::Color4<T>::FloatingPointType(0.5)),
+        T(Implementation::bitMax<T>()*((typename Math::Color4<T>::FloatingPointType(min(color.b(), color.a()))/Implementation::bitMax<T>())/(typename Math::Color4<T>::FloatingPointType(color.a())/Implementation::bitMax<T>())) + typename Math::Color4<T>::FloatingPointType(0.5)),
+        color.a()
+    };
+}
+
+template<class T> void ColorTest::unpremultipliedRoundtrip() {
+    auto&& data = UnpremultipliedRoundtripData[testCaseInstanceId()];
+    setTestCaseTemplateName(TypeTraits<T>::name());
+    setTestCaseDescription(Utility::format("{}/10, {}/10, {}/10", data.r, data.g, data.b));
+
+    /* Compared to premultipliedRoundtrip(), the sequence of operations with
+       pack()/unpack() causes extra rounding differences and in general is more
+       complex code. The simpler sequence in unpremultiplied() doesn't lead to
+       more precision always, but only in majority of cases, which is what this
+       test tries to show. */
+
+    UnsignedInt impreciseCount = 0, preciseCount = 0;
+    for(UnsignedInt i = 0; i != Implementation::bitMax<T>(); ++i) {
+        CORRADE_ITERATION(Debug::hex << i);
+
+        Math::Color4<T> ap{T(i*data.r/10),
+                           T(i*data.g/10),
+                           T(i*data.b/10),
+                           T(i)};
+
+        /* It only matches when we replicate the exact sequence of operations */
+        Math::Color4<T> ae = unpremultipliedPackedExact(ap);
+        CORRADE_COMPARE(ae, Math::pack<Math::Color4<T>>(Math::unpack<Color4>(ap).unpremultiplied()));
+
+        /* The unpremultiplied() implementation is at most off-by-one from
+           that. Casting, not unpacking, to a float type so we can compare with
+           a ±1 delta even the boundary values without overflow. */
+        Math::Color4<T> a = ap.unpremultiplied();
+        CORRADE_COMPARE_WITH(Color4{a},
+            Color4{Math::pack<Math::Color4<T>>(Math::unpack<Color4>(ap).unpremultiplied())},
+            TestSuite::Compare::around(Color4{1.0f, 1.0f}));
+
+        /* If they're different, the unpremultiplied() should be always closer
+           to the ideal than unpack() + pack() */
+        if(ae != a) {
+            const Color3 expected{
+                Float(UnsignedInt(Implementation::bitMax<T>())*data.r/10)/Implementation::bitMax<T>(),
+                Float(UnsignedInt(Implementation::bitMax<T>())*data.g/10)/Implementation::bitMax<T>(),
+                Float(UnsignedInt(Implementation::bitMax<T>())*data.b/10)/Implementation::bitMax<T>()};
+            const Float aDelta = (Math::unpack<Color3>(a.rgb()) - expected).dot();
+            const Float aeDelta = (Math::unpack<Color3>(ae.rgb()) - expected).dot();
+
+            if(aDelta > aeDelta)
+                ++impreciseCount;
+            else if(aDelta < aeDelta)
+                ++preciseCount;
+        }
+    }
+
+    if(impreciseCount > preciseCount)
+        CORRADE_FAIL(impreciseCount << "values out of" << Implementation::bitMax<T>() << "were less precise than the pack()/unpack() variant," << preciseCount << "were more precise.");
+    if(impreciseCount)
+        CORRADE_WARN(impreciseCount << "values out of" << Implementation::bitMax<T>() << "were less precise than the pack()/unpack() variant," << preciseCount << "were more precise.");
+    else if(preciseCount)
+        CORRADE_INFO(preciseCount << "values out of" << Implementation::bitMax<T>() << "were more precise than the pack()/unpack() variant.");
+}
+
 void ColorTest::multiplyDivideIntegral() {
     typedef Math::Color3<Int> Color3i;
     typedef Math::Color4<Int> Color4i;