From 05b1cefda522ac9d8fbe697e2e376f296a6c499a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Wed, 3 Apr 2019 16:06:26 +0200 Subject: [PATCH] Math: avoid operator[] calls where possible (and not too verbose). In debug builds (where nothing is inlined), this makes matrix multiplication run in 33% of the time and matrix inversion roughly twice as fast. --- src/Magnum/Math/Bezier.h | 2 +- src/Magnum/Math/Matrix.h | 34 +++++++++++++++------ src/Magnum/Math/RectangularMatrix.h | 27 ++++++++++++----- src/Magnum/Math/Vector.h | 24 +++++++++++---- src/Magnum/Math/Vector2.h | 8 ++--- src/Magnum/Math/Vector3.h | 28 ++++++++++-------- src/Magnum/Math/Vector4.h | 46 ++++++++++++++++------------- 7 files changed, 110 insertions(+), 59 deletions(-) diff --git a/src/Magnum/Math/Bezier.h b/src/Magnum/Math/Bezier.h index 67c3e1f1f..42a639908 100644 --- a/src/Magnum/Math/Bezier.h +++ b/src/Magnum/Math/Bezier.h @@ -150,7 +150,7 @@ template class Bezier { /** @brief Equality comparison */ bool operator==(const Bezier& other) const { for(std::size_t i = 0; i != order + 1; ++i) - if((*this)[i] != other[i]) return false; + if(_data[i] != other._data[i]) return false; return true; } diff --git a/src/Magnum/Math/Matrix.h b/src/Magnum/Math/Matrix.h index c26e42113..95d60118c 100644 --- a/src/Magnum/Math/Matrix.h +++ b/src/Magnum/Math/Matrix.h @@ -219,6 +219,8 @@ template class Matrix: public RectangularMatrix; + /* Implementation for RectangularMatrix::RectangularMatrix(const RectangularMatrix&) */ template constexpr explicit Matrix(Implementation::Sequence, const RectangularMatrix& other) noexcept: RectangularMatrix{Implementation::valueOrIdentityVector(other)...} {} }; @@ -295,24 +297,30 @@ template struct MatrixDeterminant { T operator()(const Matrix& m); }; -template T MatrixDeterminant::operator()(const Matrix& m) { +template inline T MatrixDeterminant::operator()(const Matrix& m) { T out(0); + /* Using ._data[] instead of [] to avoid function call indirection on debug + builds (saves a lot, yet doesn't obfuscate too much) */ for(std::size_t col = 0; col != size; ++col) - out += ((col & 1) ? -1 : 1)*m[col][0]*m.ij(col, 0).determinant(); + out += ((col & 1) ? -1 : 1)*m._data[col]._data[0]*m.ij(col, 0).determinant(); return out; } template struct MatrixDeterminant<2, T> { constexpr T operator()(const Matrix<2, T>& m) const { - return m[0][0]*m[1][1] - m[1][0]*m[0][1]; + /* Using ._data[] instead of [] to avoid function call indirection + on debug builds (saves a lot, yet doesn't obfuscate too much) */ + return m._data[0]._data[0]*m._data[1]._data[1] - m._data[1]._data[0]*m._data[0]._data[1]; } }; template struct MatrixDeterminant<1, T> { constexpr T operator()(const Matrix<1, T>& m) const { - return m[0][0]; + /* Using ._data[] instead of [] to avoid function call indirection + on debug builds (saves a lot, yet doesn't obfuscate too much) */ + return m._data[0]._data[0]; } }; @@ -322,14 +330,17 @@ template struct StrictWeakOrdering>: #endif template bool Matrix::isOrthogonal() const { + /* Using ._data[] instead of [] to avoid function call indirection on debug + builds (saves a lot, yet doesn't obfuscate too much) */ + /* Normality */ for(std::size_t i = 0; i != size; ++i) - if(!(*this)[i].isNormalized()) return false; + if(!RectangularMatrix::_data[i].isNormalized()) return false; /* Orthogonality */ for(std::size_t i = 0; i != size-1; ++i) for(std::size_t j = i+1; j != size; ++j) - if(dot((*this)[i], (*this)[j]) > TypeTraits::epsilon()) + if(dot(RectangularMatrix::_data[i], RectangularMatrix::_data[j]) > TypeTraits::epsilon()) return false; return true; @@ -338,10 +349,13 @@ template bool Matrix::isOrthogonal() const { template Matrix Matrix::ij(const std::size_t skipCol, const std::size_t skipRow) const { Matrix out{NoInit}; + /* Using ._data[] instead of [] to avoid function call indirection on debug + builds (saves a lot, yet doesn't obfuscate too much) */ for(std::size_t col = 0; col != size-1; ++col) for(std::size_t row = 0; row != size-1; ++row) - out[col][row] = (*this)[col + (col >= skipCol)] - [row + (row >= skipRow)]; + out._data[col]._data[row] = RectangularMatrix:: + _data[col + (col >= skipCol)] + ._data[row + (row >= skipRow)]; return out; } @@ -351,9 +365,11 @@ template Matrix Matrix::inverted() const T _determinant = determinant(); + /* Using ._data[] instead of [] to avoid function call indirection on debug + builds (saves a lot, yet doesn't obfuscate too much) */ for(std::size_t col = 0; col != size; ++col) for(std::size_t row = 0; row != size; ++row) - out[col][row] = (((row+col) & 1) ? -1 : 1)*ij(row, col).determinant()/_determinant; + out._data[col]._data[row] = (((row+col) & 1) ? -1 : 1)*ij(row, col).determinant()/_determinant; return out; } diff --git a/src/Magnum/Math/RectangularMatrix.h b/src/Magnum/Math/RectangularMatrix.h index c8882f64b..5c3f1343f 100644 --- a/src/Magnum/Math/RectangularMatrix.h +++ b/src/Magnum/Math/RectangularMatrix.h @@ -443,6 +443,11 @@ template class RectangularMatrix { template constexpr explicit RectangularMatrix(Implementation::Sequence, T value) noexcept: _data{Vector((static_cast(sequence), value))...} {} private: + /* These two needed to access _data to speed up debug builds, + Matrix::ij() needs access to different Matrix sizes */ + template friend class Matrix; + template friend struct Implementation::MatrixDeterminant; + /* Implementation for RectangularMatrix::RectangularMatrix(const RectangularMatrix&) */ template constexpr explicit RectangularMatrix(Implementation::Sequence, const RectangularMatrix& matrix) noexcept: _data{Vector(matrix[sequence])...} {} @@ -451,11 +456,11 @@ template class RectangularMatrix { template constexpr explicit RectangularMatrix(Implementation::Sequence, U) noexcept: _data{Vector((static_cast(sequence), U{typename U::Init{}}))...} {} template constexpr RectangularMatrix flippedColsInternal(Implementation::Sequence) const { - return {(*this)[cols - 1 - sequence]...}; + return {_data[cols - 1 - sequence]...}; } template constexpr RectangularMatrix flippedRowsInternal(Implementation::Sequence) const { - return {(*this)[sequence].flipped()...}; + return {_data[sequence].flipped()...}; } template constexpr Vector diagonalInternal(Implementation::Sequence) const; @@ -708,15 +713,19 @@ template template inline Vector RectangularMatrix::row(std::size_t row) const { Vector out; + /* Using ._data[] instead of [] to avoid function call indirection + on debug builds (saves a lot, yet doesn't obfuscate too much) */ for(std::size_t i = 0; i != cols; ++i) - out[i] = _data[i][row]; + out[i] = _data[i]._data[row]; return out; } template inline void RectangularMatrix::setRow(std::size_t row, const Vector& data) { + /* Using ._data[] instead of [] to avoid function call indirection + on debug builds (saves a lot, yet doesn't obfuscate too much) */ for(std::size_t i = 0; i != cols; ++i) - _data[i][row] = data[i]; + _data[i]._data[row] = data._data[i]; } template inline RectangularMatrix RectangularMatrix::operator-() const { @@ -731,10 +740,12 @@ template inline RectangularMatrix template inline RectangularMatrix RectangularMatrix::operator*(const RectangularMatrix& other) const { RectangularMatrix out{ZeroInit}; + /* Using ._data[] instead of [] to avoid function call indirection + on debug builds (saves a lot, yet doesn't obfuscate too much) */ for(std::size_t col = 0; col != size; ++col) for(std::size_t row = 0; row != rows; ++row) for(std::size_t pos = 0; pos != cols; ++pos) - out[col][row] += _data[pos][row]*other._data[col][pos]; + out._data[col]._data[row] += _data[pos]._data[row]*other._data[col]._data[pos]; return out; } @@ -742,9 +753,11 @@ template template template inline RectangularMatrix RectangularMatrix::transposed() const { RectangularMatrix out{NoInit}; + /* Using ._data[] instead of [] to avoid function call indirection + on debug builds (saves a lot, yet doesn't obfuscate too much) */ for(std::size_t col = 0; col != cols; ++col) for(std::size_t row = 0; row != rows; ++row) - out[row][col] = _data[col][row]; + out._data[row]._data[col] = _data[col]._data[row]; return out; } @@ -753,7 +766,7 @@ template constexpr auto Rectangular #ifndef DOXYGEN_GENERATING_OUTPUT template template constexpr auto RectangularMatrix::diagonalInternal(Implementation::Sequence) const -> Vector { - return {(*this)[sequence][sequence]...}; + return {_data[sequence][sequence]...}; } #endif diff --git a/src/Magnum/Math/Vector.h b/src/Magnum/Math/Vector.h index 57d3217cc..2015cf31c 100644 --- a/src/Magnum/Math/Vector.h +++ b/src/Magnum/Math/Vector.h @@ -74,6 +74,9 @@ namespace Implementation { return vec == Vector{}; } }; + + /* Used to make friends to speed up debug builds */ + template struct MatrixDeterminant; } /** @relatesalso Vector @@ -125,8 +128,6 @@ See @ref matrix-vector for brief introduction. template class Vector { static_assert(size != 0, "Vector cannot have zero elements"); - template friend class Vector; - public: typedef T Type; /**< @brief Underlying data type */ @@ -609,7 +610,22 @@ template class Vector { */ std::pair minmax() const; + #ifndef DOXYGEN_GENERATING_OUTPUT + protected: + #else + private: + #endif + /* So derived classes can avoid the overhead of operator[] in debug + builds */ + T _data[size]; + private: + template friend class Vector; + /* These three needed to access _data to speed up debug builds */ + template friend class RectangularMatrix; + template friend class Matrix; + template friend struct Implementation::MatrixDeterminant; + /* Implementation for Vector::Vector(const Vector&) */ template constexpr explicit Vector(Implementation::Sequence, const Vector& vector) noexcept: _data{T(vector._data[sequence])...} {} @@ -621,10 +637,8 @@ template class Vector { } template constexpr Vector flippedInternal(Implementation::Sequence) const { - return {(*this)[size - 1 - sequence]...}; + return {_data[size - 1 - sequence]...}; } - - T _data[size]; }; /** @relates Vector diff --git a/src/Magnum/Math/Vector2.h b/src/Magnum/Math/Vector2.h index 5cfe22fce..cf30409f2 100644 --- a/src/Magnum/Math/Vector2.h +++ b/src/Magnum/Math/Vector2.h @@ -144,10 +144,10 @@ template class Vector2: public Vector<2, T> { /** @brief Copy constructor */ constexpr /*implicit*/ Vector2(const Vector<2, T>& other) noexcept: Vector<2, T>(other) {} - T& x() { return (*this)[0]; } /**< @brief X component */ - constexpr T x() const { return (*this)[0]; } /**< @overload */ - T& y() { return (*this)[1]; } /**< @brief Y component */ - constexpr T y() const { return (*this)[1]; } /**< @overload */ + T& x() { return Vector<2, T>::_data[0]; } /**< @brief X component */ + constexpr T x() const { return Vector<2, T>::_data[0]; } /**< @overload */ + T& y() { return Vector<2, T>::_data[1]; } /**< @brief Y component */ + constexpr T y() const { return Vector<2, T>::_data[1]; } /**< @overload */ /** * @brief Perpendicular vector diff --git a/src/Magnum/Math/Vector3.h b/src/Magnum/Math/Vector3.h index e75855c04..88ef8d5ba 100644 --- a/src/Magnum/Math/Vector3.h +++ b/src/Magnum/Math/Vector3.h @@ -179,48 +179,48 @@ template class Vector3: public Vector<3, T> { * * @see @ref r() */ - T& x() { return (*this)[0]; } - constexpr T x() const { return (*this)[0]; } /**< @overload */ + T& x() { return Vector<3, T>::_data[0]; } + constexpr T x() const { return Vector<3, T>::_data[0]; } /**< @overload */ /** * @brief Y component * * @see @ref g() */ - T& y() { return (*this)[1]; } - constexpr T y() const { return (*this)[1]; } /**< @overload */ + T& y() { return Vector<3, T>::_data[1]; } + constexpr T y() const { return Vector<3, T>::_data[1]; } /**< @overload */ /** * @brief Z component * * @see @ref b() */ - T& z() { return (*this)[2]; } - constexpr T z() const { return (*this)[2]; } /**< @overload */ + T& z() { return Vector<3, T>::_data[2]; } + constexpr T z() const { return Vector<3, T>::_data[2]; } /**< @overload */ /** * @brief R component * * Equivalent to @ref x(). */ - T& r() { return x(); } - constexpr T r() const { return x(); } /**< @overload */ + T& r() { return Vector<3, T>::_data[0]; } + constexpr T r() const { return Vector<3, T>::_data[0]; } /**< @overload */ /** * @brief G component * * Equivalent to @ref y(). */ - T& g() { return y(); } - constexpr T g() const { return y(); } /**< @overload */ + T& g() { return Vector<3, T>::_data[1]; } + constexpr T g() const { return Vector<3, T>::_data[1]; } /**< @overload */ /** * @brief B component * * Equivalent to @ref z(). */ - T& b() { return z(); } - constexpr T b() const { return z(); } /**< @overload */ + T& b() { return Vector<3, T>::_data[2]; } + constexpr T b() const { return Vector<3, T>::_data[2]; } /**< @overload */ /** * @brief XY part of the vector @@ -229,7 +229,9 @@ template class Vector3: public Vector<3, T> { * @see @ref swizzle() */ Vector2& xy() { return Vector2::from(Vector<3, T>::data()); } - constexpr const Vector2 xy() const { return {x(), y()}; } /**< @overload */ + constexpr const Vector2 xy() const { + return {Vector<3, T>::_data[0], Vector<3, T>::_data[1]}; + } /**< @overload */ MAGNUM_VECTOR_SUBCLASS_IMPLEMENTATION(3, Vector3) }; diff --git a/src/Magnum/Math/Vector4.h b/src/Magnum/Math/Vector4.h index d18e0015a..b10cb16ff 100644 --- a/src/Magnum/Math/Vector4.h +++ b/src/Magnum/Math/Vector4.h @@ -110,64 +110,64 @@ template class Vector4: public Vector<4, T> { * * @see @ref r() */ - T& x() { return (*this)[0]; } - constexpr T x() const { return (*this)[0]; } /**< @overload */ + T& x() { return Vector<4, T>::_data[0]; } + constexpr T x() const { return Vector<4, T>::_data[0]; } /**< @overload */ /** * @brief Y component * * @see @ref g() */ - T& y() { return (*this)[1]; } - constexpr T y() const { return (*this)[1]; } /**< @overload */ + T& y() { return Vector<4, T>::_data[1]; } + constexpr T y() const { return Vector<4, T>::_data[1]; } /**< @overload */ /** * @brief Z component * * @see @ref b() */ - T& z() { return (*this)[2]; } - constexpr T z() const { return (*this)[2]; } /**< @overload */ + T& z() { return Vector<4, T>::_data[2]; } + constexpr T z() const { return Vector<4, T>::_data[2]; } /**< @overload */ /** * @brief W component * * @see @ref a() */ - T& w() { return (*this)[3]; } - constexpr T w() const { return (*this)[3]; } /**< @overload */ + T& w() { return Vector<4, T>::_data[3]; } + constexpr T w() const { return Vector<4, T>::_data[3]; } /**< @overload */ /** * @brief R component * * Equivalent to @ref x(). */ - T& r() { return x(); } - constexpr T r() const { return x(); } /**< @overload */ + T& r() { return Vector<4, T>::_data[0]; } + constexpr T r() const { return Vector<4, T>::_data[0]; } /**< @overload */ /** * @brief G component * * Equivalent to @ref y(). */ - T& g() { return y(); } - constexpr T g() const { return y(); } /**< @overload */ + T& g() { return Vector<4, T>::_data[1]; } + constexpr T g() const { return Vector<4, T>::_data[1]; } /**< @overload */ /** * @brief B component * * Equivalent to @ref z(). */ - T& b() { return z(); } - constexpr T b() const { return z(); } /**< @overload */ + T& b() { return Vector<4, T>::_data[2]; } + constexpr T b() const { return Vector<4, T>::_data[2]; } /**< @overload */ /** * @brief A component * * Equivalent to @ref w(). */ - T& a() { return w(); } - constexpr T a() const { return w(); } /**< @overload */ + T& a() { return Vector<4, T>::_data[3]; } + constexpr T a() const { return Vector<4, T>::_data[3]; } /**< @overload */ /** * @brief XYZ part of the vector @@ -176,7 +176,9 @@ template class Vector4: public Vector<4, T> { * @see @ref swizzle(), @ref rgb() */ Vector3& xyz() { return Vector3::from(Vector<4, T>::data()); } - constexpr const Vector3 xyz() const { return {x(), y(), z()}; } /**< @overload */ + constexpr const Vector3 xyz() const { + return {Vector<4, T>::_data[0], Vector<4, T>::_data[1], Vector<4, T>::_data[2]}; + } /**< @overload */ /** * @brief RGB part of the vector @@ -185,8 +187,10 @@ template class Vector4: public Vector<4, T> { * Equivalent to @ref xyz(). * @see @ref swizzle() */ - Vector3& rgb() { return xyz(); } - constexpr const Vector3 rgb() const { return xyz(); } /**< @overload */ + Vector3& rgb() { return Vector3::from(Vector<4, T>::data()); } + constexpr const Vector3 rgb() const { + return {Vector<4, T>::_data[0], Vector<4, T>::_data[1], Vector<4, T>::_data[2]}; + } /**< @overload */ /** * @brief XY part of the vector @@ -195,7 +199,9 @@ template class Vector4: public Vector<4, T> { * @see @ref swizzle() */ Vector2& xy() { return Vector2::from(Vector<4, T>::data()); } - constexpr const Vector2 xy() const { return {x(), y()}; } /**< @overload */ + constexpr const Vector2 xy() const { + return {Vector<4, T>::_data[0], Vector<4, T>::_data[1]}; + } /**< @overload */ MAGNUM_VECTOR_SUBCLASS_IMPLEMENTATION(4, Vector4) };