Math: avoid operator[] calls where possible (and not too verbose).

In debug builds (where nothing is inlined), this makes matrix multiplication run in 33% of the time and matrix inversion roughly twice as fast.
7 years ago · 05b1cefda5
7 changed files with 110 additions and 59 deletions
--- a/src/Magnum/Math/Bezier.h
+++ b/src/Magnum/Math/Bezier.h
@ -150,7 +150,7 @@ template<UnsignedInt order, UnsignedInt dimensions, class T> class Bezier {
        /** @brief Equality comparison */
        bool operator==(const Bezier<order, dimensions, T>& other) const {
            for(std::size_t i = 0; i != order + 1; ++i)
-                if((*this)[i] != other[i]) return false;
+                if(_data[i] != other._data[i]) return false;
            return true;
        }

--- a/src/Magnum/Math/Matrix.h
+++ b/src/Magnum/Math/Matrix.h
@ -219,6 +219,8 @@ template<std::size_t size, class T> class Matrix: public RectangularMatrix<size,
        #endif

    private:
+        friend struct Implementation::MatrixDeterminant<size, T>;
+
        /* Implementation for RectangularMatrix<cols, rows, T>::RectangularMatrix(const RectangularMatrix<cols, rows, U>&) */
        template<std::size_t otherSize, std::size_t ...col> constexpr explicit Matrix(Implementation::Sequence<col...>, const RectangularMatrix<otherSize, otherSize, T>& other) noexcept: RectangularMatrix<size, size, T>{Implementation::valueOrIdentityVector<size, col>(other)...} {}
 };
@ -295,24 +297,30 @@ template<std::size_t size, class T> struct MatrixDeterminant {
    T operator()(const Matrix<size, T>& m);
 };

-template<std::size_t size, class T> T MatrixDeterminant<size, T>::operator()(const Matrix<size, T>& m) {
+template<std::size_t size, class T> inline T MatrixDeterminant<size, T>::operator()(const Matrix<size, T>& m) {
    T out(0);

+    /* Using ._data[] instead of [] to avoid function call indirection on debug
+       builds (saves a lot, yet doesn't obfuscate too much) */
    for(std::size_t col = 0; col != size; ++col)
-        out += ((col & 1) ? -1 : 1)*m[col][0]*m.ij(col, 0).determinant();
+        out += ((col & 1) ? -1 : 1)*m._data[col]._data[0]*m.ij(col, 0).determinant();

    return out;
 }

 template<class T> struct MatrixDeterminant<2, T> {
    constexpr T operator()(const Matrix<2, T>& m) const {
-        return m[0][0]*m[1][1] - m[1][0]*m[0][1];
+        /* Using ._data[] instead of [] to avoid function call indirection
+           on debug builds (saves a lot, yet doesn't obfuscate too much) */
+        return m._data[0]._data[0]*m._data[1]._data[1] - m._data[1]._data[0]*m._data[0]._data[1];
    }
 };

 template<class T> struct MatrixDeterminant<1, T> {
    constexpr T operator()(const Matrix<1, T>& m) const {
-        return m[0][0];
+        /* Using ._data[] instead of [] to avoid function call indirection
+           on debug builds (saves a lot, yet doesn't obfuscate too much) */
+        return m._data[0]._data[0];
    }
 };

@ -322,14 +330,17 @@ template<std::size_t size, class T> struct StrictWeakOrdering<Matrix<size, T>>:
 #endif

 template<std::size_t size, class T> bool Matrix<size, T>::isOrthogonal() const {
+    /* Using ._data[] instead of [] to avoid function call indirection on debug
+       builds (saves a lot, yet doesn't obfuscate too much) */
+
    /* Normality */
    for(std::size_t i = 0; i != size; ++i)
-        if(!(*this)[i].isNormalized()) return false;
+        if(!RectangularMatrix<size, size, T>::_data[i].isNormalized()) return false;

    /* Orthogonality */
    for(std::size_t i = 0; i != size-1; ++i)
        for(std::size_t j = i+1; j != size; ++j)
-            if(dot((*this)[i], (*this)[j]) > TypeTraits<T>::epsilon())
+            if(dot(RectangularMatrix<size, size, T>::_data[i], RectangularMatrix<size, size, T>::_data[j]) > TypeTraits<T>::epsilon())
                return false;

    return true;
@ -338,10 +349,13 @@ template<std::size_t size, class T> bool Matrix<size, T>::isOrthogonal() const {
 template<std::size_t size, class T> Matrix<size-1, T> Matrix<size, T>::ij(const std::size_t skipCol, const std::size_t skipRow) const {
    Matrix<size-1, T> out{NoInit};

+    /* Using ._data[] instead of [] to avoid function call indirection on debug
+       builds (saves a lot, yet doesn't obfuscate too much) */
    for(std::size_t col = 0; col != size-1; ++col)
        for(std::size_t row = 0; row != size-1; ++row)
-            out[col][row] = (*this)[col + (col >= skipCol)]
-                                    [row + (row >= skipRow)];
+            out._data[col]._data[row] = RectangularMatrix<size, size, T>::
+                _data[col + (col >= skipCol)]
+               ._data[row + (row >= skipRow)];

    return out;
 }
@ -351,9 +365,11 @@ template<std::size_t size, class T> Matrix<size, T> Matrix<size, T>::inverted()

    const T _determinant = determinant();

+    /* Using ._data[] instead of [] to avoid function call indirection on debug
+       builds (saves a lot, yet doesn't obfuscate too much) */
    for(std::size_t col = 0; col != size; ++col)
        for(std::size_t row = 0; row != size; ++row)
-            out[col][row] = (((row+col) & 1) ? -1 : 1)*ij(row, col).determinant()/_determinant;
+            out._data[col]._data[row] = (((row+col) & 1) ? -1 : 1)*ij(row, col).determinant()/_determinant;

    return out;
 }
--- a/src/Magnum/Math/RectangularMatrix.h
+++ b/src/Magnum/Math/RectangularMatrix.h
@ -443,6 +443,11 @@ template<std::size_t cols, std::size_t rows, class T> class RectangularMatrix {
        template<std::size_t ...sequence> constexpr explicit RectangularMatrix(Implementation::Sequence<sequence...>, T value) noexcept: _data{Vector<rows, T>((static_cast<void>(sequence), value))...} {}

    private:
+        /* These two needed to access _data to speed up debug builds,
+           Matrix::ij() needs access to different Matrix sizes */
+        template<std::size_t, class> friend class Matrix;
+        template<std::size_t, class> friend struct Implementation::MatrixDeterminant;
+
        /* Implementation for RectangularMatrix<cols, rows, T>::RectangularMatrix(const RectangularMatrix<cols, rows, U>&) */
        template<class U, std::size_t ...sequence> constexpr explicit RectangularMatrix(Implementation::Sequence<sequence...>, const RectangularMatrix<cols, rows, U>& matrix) noexcept: _data{Vector<rows, T>(matrix[sequence])...} {}

@ -451,11 +456,11 @@ template<std::size_t cols, std::size_t rows, class T> class RectangularMatrix {
        template<class U, std::size_t ...sequence> constexpr explicit RectangularMatrix(Implementation::Sequence<sequence...>, U) noexcept: _data{Vector<rows, T>((static_cast<void>(sequence), U{typename U::Init{}}))...} {}

        template<std::size_t ...sequence> constexpr RectangularMatrix<cols, rows, T> flippedColsInternal(Implementation::Sequence<sequence...>) const {
-            return {(*this)[cols - 1 - sequence]...};
+            return {_data[cols - 1 - sequence]...};
        }

        template<std::size_t ...sequence> constexpr RectangularMatrix<cols, rows, T> flippedRowsInternal(Implementation::Sequence<sequence...>) const {
-            return {(*this)[sequence].flipped()...};
+            return {_data[sequence].flipped()...};
        }

        template<std::size_t ...sequence> constexpr Vector<DiagonalSize, T> diagonalInternal(Implementation::Sequence<sequence...>) const;
@ -708,15 +713,19 @@ template<std::size_t cols, std::size_t rows, class T> template<std::size_t ...se
 template<std::size_t cols, std::size_t rows, class T> inline Vector<cols, T> RectangularMatrix<cols, rows, T>::row(std::size_t row) const {
    Vector<cols, T> out;

+    /* Using ._data[] instead of [] to avoid function call indirection
+       on debug builds (saves a lot, yet doesn't obfuscate too much) */
    for(std::size_t i = 0; i != cols; ++i)
-        out[i] = _data[i][row];
+        out[i] = _data[i]._data[row];

    return out;
 }

 template<std::size_t cols, std::size_t rows, class T> inline void RectangularMatrix<cols, rows, T>::setRow(std::size_t row, const Vector<cols, T>& data) {
+    /* Using ._data[] instead of [] to avoid function call indirection
+       on debug builds (saves a lot, yet doesn't obfuscate too much) */
    for(std::size_t i = 0; i != cols; ++i)
-        _data[i][row] = data[i];
+        _data[i]._data[row] = data._data[i];
 }

 template<std::size_t cols, std::size_t rows, class T> inline RectangularMatrix<cols, rows, T> RectangularMatrix<cols, rows, T>::operator-() const {
@ -731,10 +740,12 @@ template<std::size_t cols, std::size_t rows, class T> inline RectangularMatrix<c
 template<std::size_t cols, std::size_t rows, class T> template<std::size_t size> inline RectangularMatrix<size, rows, T> RectangularMatrix<cols, rows, T>::operator*(const RectangularMatrix<size, cols, T>& other) const {
    RectangularMatrix<size, rows, T> out{ZeroInit};

+    /* Using ._data[] instead of [] to avoid function call indirection
+       on debug builds (saves a lot, yet doesn't obfuscate too much) */
    for(std::size_t col = 0; col != size; ++col)
        for(std::size_t row = 0; row != rows; ++row)
            for(std::size_t pos = 0; pos != cols; ++pos)
-                out[col][row] += _data[pos][row]*other._data[col][pos];
+                out._data[col]._data[row] += _data[pos]._data[row]*other._data[col]._data[pos];

    return out;
 }
@ -742,9 +753,11 @@ template<std::size_t cols, std::size_t rows, class T> template<std::size_t size>
 template<std::size_t cols, std::size_t rows, class T> inline RectangularMatrix<rows, cols, T> RectangularMatrix<cols, rows, T>::transposed() const {
    RectangularMatrix<rows, cols, T> out{NoInit};

+    /* Using ._data[] instead of [] to avoid function call indirection
+       on debug builds (saves a lot, yet doesn't obfuscate too much) */
    for(std::size_t col = 0; col != cols; ++col)
        for(std::size_t row = 0; row != rows; ++row)
-            out[row][col] = _data[col][row];
+            out._data[row]._data[col] = _data[col]._data[row];

    return out;
 }
@ -753,7 +766,7 @@ template<std::size_t cols, std::size_t rows, class T> constexpr auto Rectangular

 #ifndef DOXYGEN_GENERATING_OUTPUT
 template<std::size_t cols, std::size_t rows, class T> template<std::size_t ...sequence> constexpr auto RectangularMatrix<cols, rows, T>::diagonalInternal(Implementation::Sequence<sequence...>) const -> Vector<DiagonalSize, T> {
-    return {(*this)[sequence][sequence]...};
+    return {_data[sequence][sequence]...};
 }
 #endif

--- a/src/Magnum/Math/Vector.h
+++ b/src/Magnum/Math/Vector.h
@ -74,6 +74,9 @@ namespace Implementation {
            return vec == Vector<size, T>{};
        }
    };
+
+    /* Used to make friends to speed up debug builds */
+    template<std::size_t, class> struct MatrixDeterminant;
 }

 /** @relatesalso Vector
@ -125,8 +128,6 @@ See @ref matrix-vector for brief introduction.
 template<std::size_t size, class T> class Vector {
    static_assert(size != 0, "Vector cannot have zero elements");

-    template<std::size_t, class> friend class Vector;
-
    public:
        typedef T Type;         /**< @brief Underlying data type */

@ -609,7 +610,22 @@ template<std::size_t size, class T> class Vector {
         */
        std::pair<T, T> minmax() const;

+    #ifndef DOXYGEN_GENERATING_OUTPUT
+    protected:
+    #else
+    private:
+    #endif
+        /* So derived classes can avoid the overhead of operator[] in debug
+           builds */
+        T _data[size];
+
    private:
+        template<std::size_t, class> friend class Vector;
+        /* These three needed to access _data to speed up debug builds */
+        template<std::size_t, std::size_t, class> friend class RectangularMatrix;
+        template<std::size_t, class> friend class Matrix;
+        template<std::size_t, class> friend struct Implementation::MatrixDeterminant;
+
        /* Implementation for Vector<size, T>::Vector(const Vector<size, U>&) */
        template<class U, std::size_t ...sequence> constexpr explicit Vector(Implementation::Sequence<sequence...>, const Vector<size, U>& vector) noexcept: _data{T(vector._data[sequence])...} {}

@ -621,10 +637,8 @@ template<std::size_t size, class T> class Vector {
        }

        template<std::size_t ...sequence> constexpr Vector<size, T> flippedInternal(Implementation::Sequence<sequence...>) const {
-            return {(*this)[size - 1 - sequence]...};
+            return {_data[size - 1 - sequence]...};
        }
-
-        T _data[size];
 };

 /** @relates Vector
--- a/src/Magnum/Math/Vector2.h
+++ b/src/Magnum/Math/Vector2.h
@ -144,10 +144,10 @@ template<class T> class Vector2: public Vector<2, T> {
        /** @brief Copy constructor */
        constexpr /*implicit*/ Vector2(const Vector<2, T>& other) noexcept: Vector<2, T>(other) {}

-        T& x() { return (*this)[0]; }                   /**< @brief X component */
-        constexpr T x() const { return (*this)[0]; }    /**< @overload */
-        T& y() { return (*this)[1]; }                   /**< @brief Y component */
-        constexpr T y() const { return (*this)[1]; }    /**< @overload */
+        T& x() { return Vector<2, T>::_data[0]; } /**< @brief X component */
+        constexpr T x() const { return Vector<2, T>::_data[0]; } /**< @overload */
+        T& y() { return Vector<2, T>::_data[1]; } /**< @brief Y component */
+        constexpr T y() const { return Vector<2, T>::_data[1]; } /**< @overload */

        /**
         * @brief Perpendicular vector
--- a/src/Magnum/Math/Vector3.h
+++ b/src/Magnum/Math/Vector3.h
@ -179,48 +179,48 @@ template<class T> class Vector3: public Vector<3, T> {
         *
         * @see @ref r()
         */
-        T& x() { return (*this)[0]; }
-        constexpr T x() const { return (*this)[0]; }    /**< @overload */
+        T& x() { return Vector<3, T>::_data[0]; }
+        constexpr T x() const { return Vector<3, T>::_data[0]; } /**< @overload */

        /**
         * @brief Y component
         *
         * @see @ref g()
         */
-        T& y() { return (*this)[1]; }
-        constexpr T y() const { return (*this)[1]; }    /**< @overload */
+        T& y() { return Vector<3, T>::_data[1]; }
+        constexpr T y() const { return Vector<3, T>::_data[1]; } /**< @overload */

        /**
         * @brief Z component
         *
         * @see @ref b()
         */
-        T& z() { return (*this)[2]; }
-        constexpr T z() const { return (*this)[2]; }    /**< @overload */
+        T& z() { return Vector<3, T>::_data[2]; }
+        constexpr T z() const { return Vector<3, T>::_data[2]; } /**< @overload */

        /**
         * @brief R component
         *
         * Equivalent to @ref x().
         */
-        T& r() { return x(); }
-        constexpr T r() const { return x(); }           /**< @overload */
+        T& r() { return Vector<3, T>::_data[0]; }
+        constexpr T r() const { return Vector<3, T>::_data[0]; } /**< @overload */

        /**
         * @brief G component
         *
         * Equivalent to @ref y().
         */
-        T& g() { return y(); }
-        constexpr T g() const { return y(); }           /**< @overload */
+        T& g() { return Vector<3, T>::_data[1]; }
+        constexpr T g() const { return Vector<3, T>::_data[1]; } /**< @overload */

        /**
         * @brief B component
         *
         * Equivalent to @ref z().
         */
-        T& b() { return z(); }
-        constexpr T b() const { return z(); }           /**< @overload */
+        T& b() { return Vector<3, T>::_data[2]; }
+        constexpr T b() const { return Vector<3, T>::_data[2]; } /**< @overload */

        /**
         * @brief XY part of the vector
@ -229,7 +229,9 @@ template<class T> class Vector3: public Vector<3, T> {
         * @see @ref swizzle()
         */
        Vector2<T>& xy() { return Vector2<T>::from(Vector<3, T>::data()); }
-        constexpr const Vector2<T> xy() const { return {x(), y()}; } /**< @overload */
+        constexpr const Vector2<T> xy() const {
+            return {Vector<3, T>::_data[0], Vector<3, T>::_data[1]};
+        } /**< @overload */

        MAGNUM_VECTOR_SUBCLASS_IMPLEMENTATION(3, Vector3)
 };
--- a/src/Magnum/Math/Vector4.h
+++ b/src/Magnum/Math/Vector4.h
@ -110,64 +110,64 @@ template<class T> class Vector4: public Vector<4, T> {
         *
         * @see @ref r()
         */
-        T& x() { return (*this)[0]; }
-        constexpr T x() const { return (*this)[0]; }    /**< @overload */
+        T& x() { return Vector<4, T>::_data[0]; }
+        constexpr T x() const { return Vector<4, T>::_data[0]; } /**< @overload */

        /**
         * @brief Y component
         *
         * @see @ref g()
         */
-        T& y() { return (*this)[1]; }
-        constexpr T y() const { return (*this)[1]; }    /**< @overload */
+        T& y() { return Vector<4, T>::_data[1]; }
+        constexpr T y() const { return Vector<4, T>::_data[1]; } /**< @overload */

        /**
         * @brief Z component
         *
         * @see @ref b()
         */
-        T& z() { return (*this)[2]; }
-        constexpr T z() const { return (*this)[2]; }    /**< @overload */
+        T& z() { return Vector<4, T>::_data[2]; }
+        constexpr T z() const { return Vector<4, T>::_data[2]; } /**< @overload */

        /**
         * @brief W component
         *
         * @see @ref a()
         */
-        T& w() { return (*this)[3]; }
-        constexpr T w() const { return (*this)[3]; }    /**< @overload */
+        T& w() { return Vector<4, T>::_data[3]; }
+        constexpr T w() const { return Vector<4, T>::_data[3]; } /**< @overload */

        /**
         * @brief R component
         *
         * Equivalent to @ref x().
         */
-        T& r() { return x(); }
-        constexpr T r() const { return x(); }           /**< @overload */
+        T& r() { return Vector<4, T>::_data[0]; }
+        constexpr T r() const { return Vector<4, T>::_data[0]; } /**< @overload */

        /**
         * @brief G component
         *
         * Equivalent to @ref y().
         */
-        T& g() { return y(); }
-        constexpr T g() const { return y(); }           /**< @overload */
+        T& g() { return Vector<4, T>::_data[1]; }
+        constexpr T g() const { return Vector<4, T>::_data[1]; } /**< @overload */

        /**
         * @brief B component
         *
         * Equivalent to @ref z().
         */
-        T& b() { return z(); }
-        constexpr T b() const { return z(); }           /**< @overload */
+        T& b() { return Vector<4, T>::_data[2]; }
+        constexpr T b() const { return Vector<4, T>::_data[2]; } /**< @overload */

        /**
         * @brief A component
         *
         * Equivalent to @ref w().
         */
-        T& a() { return w(); }
-        constexpr T a() const { return w(); }           /**< @overload */
+        T& a() { return Vector<4, T>::_data[3]; }
+        constexpr T a() const { return Vector<4, T>::_data[3]; } /**< @overload */

        /**
         * @brief XYZ part of the vector
@ -176,7 +176,9 @@ template<class T> class Vector4: public Vector<4, T> {
         * @see @ref swizzle(), @ref rgb()
         */
        Vector3<T>& xyz() { return Vector3<T>::from(Vector<4, T>::data()); }
-        constexpr const Vector3<T> xyz() const { return {x(), y(), z()}; } /**< @overload */
+        constexpr const Vector3<T> xyz() const {
+            return {Vector<4, T>::_data[0], Vector<4, T>::_data[1], Vector<4, T>::_data[2]};
+        } /**< @overload */

        /**
         * @brief RGB part of the vector
@ -185,8 +187,10 @@ template<class T> class Vector4: public Vector<4, T> {
         * Equivalent to @ref xyz().
         * @see @ref swizzle()
         */
-        Vector3<T>& rgb() { return xyz(); }
-        constexpr const Vector3<T> rgb() const { return xyz(); } /**< @overload */
+        Vector3<T>& rgb() { return Vector3<T>::from(Vector<4, T>::data()); }
+        constexpr const Vector3<T> rgb() const {
+            return {Vector<4, T>::_data[0], Vector<4, T>::_data[1], Vector<4, T>::_data[2]};
+        } /**< @overload */

        /**
         * @brief XY part of the vector
@ -195,7 +199,9 @@ template<class T> class Vector4: public Vector<4, T> {
         * @see @ref swizzle()
         */
        Vector2<T>& xy() { return Vector2<T>::from(Vector<4, T>::data()); }
-        constexpr const Vector2<T> xy() const { return {x(), y()}; } /**< @overload */
+        constexpr const Vector2<T> xy() const {
+            return {Vector<4, T>::_data[0], Vector<4, T>::_data[1]};
+        } /**< @overload */

        MAGNUM_VECTOR_SUBCLASS_IMPLEMENTATION(4, Vector4)
 };