From de3f8858f0433bd2eab1bdcdc5a178dd3fd7ccbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Wed, 23 Oct 2019 12:32:12 +0200 Subject: [PATCH] Math: now that we have a standalone cofactor(), inline its internals. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently this gives a nearly three-times speed up compared to before. Didn't expect that. Starting Magnum::Math::Test::MatrixBenchmark with 16 test cases... INFO Benchmarking a debug build. BENCH [01] 95.33 ± 4.52 ns multiply3()@499x10000 (wall time) BENCH [02] 183.99 ± 9.29 ns multiply4()@499x10000 (wall time) BENCH [03] 110.17 ± 8.50 ns comatrix3()@49x10000 (wall time) BENCH [04] 161.54 ± 10.13 ns invert3()@49x10000 (wall time) BENCH [05] 471.44 ± 19.40 ns invert3GaussJordan()@49x10000 (wall time) BENCH [06] 320.65 ± 13.23 ns invert3Rigid()@49x10000 (wall time) BENCH [07] 206.27 ± 9.80 ns invert3Orthogonal()@49x10000 (wall time) BENCH [08] 321.25 ± 18.82 ns comatrix4()@49x10000 (wall time) BENCH [09] 445.50 ± 15.18 ns invert4()@49x10000 (wall time) BENCH [10] 828.55 ± 16.96 ns invert4GaussJordan()@49x10000 (wall time) BENCH [11] 533.23 ± 21.75 ns invert4Rigid()@49x10000 (wall time) BENCH [12] 345.56 ± 10.16 ns invert4Orthogonal()@49x10000 (wall time) BENCH [13] 63.72 ± 6.85 ns transformVector3()@999x10000 (wall time) BENCH [14] 62.28 ± 4.43 ns transformPoint3()@999x10000 (wall time) BENCH [15] 82.05 ± 7.96 ns transformVector4()@999x10000 (wall time) BENCH [16] 79.32 ± 2.41 ns transformPoint4()@999x10000 (wall time) Finished Magnum::Math::Test::MatrixBenchmark with 0 errors out of 5500 checks. --- src/Magnum/Math/Matrix.h | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/src/Magnum/Math/Matrix.h b/src/Magnum/Math/Matrix.h index 103a78264..2a247c60e 100644 --- a/src/Magnum/Math/Matrix.h +++ b/src/Magnum/Math/Matrix.h @@ -372,6 +372,10 @@ template struct MatrixDeterminant { return out; } + + T operator()(const Matrix& m, const std::size_t skipCol, const std::size_t skipRow) { + return m.ij(skipCol, skipRow).determinant(); + } }; /* This is not *critically* needed here (the specializations for 2x2 and 1x1 @@ -386,6 +390,19 @@ template struct MatrixDeterminant<3, T> { m._data[0]._data[1]*(m._data[1]._data[0]*m._data[2]._data[2] - m._data[2]._data[0]*m._data[1]._data[2]) + m._data[0]._data[2]*(m._data[1]._data[0]*m._data[2]._data[1] - m._data[2]._data[0]*m._data[1]._data[1]); } + + /* Used internally by cofactor(), basically just an inlined variant of + ij(skipCol, skipRow).determinant() */ + constexpr T operator()(const Matrix<4, T>& m, const std::size_t skipCol, const std::size_t skipRow) const { + #define _col(i) _data[i + (i >= skipCol)] + #define _row(i) _data[i + (i >= skipRow)] + return + m._col(0)._row(0)*((m._col(1)._row(1)*m._col(2)._row(2)) - (m._col(2)._row(1)*m._col(1)._row(2))) - + m._col(0)._row(1)*(m._col(1)._row(0)*m._col(2)._row(2) - m._col(2)._row(0)*m._col(1)._row(2)) + + m._col(0)._row(2)*(m._col(1)._row(0)*m._col(2)._row(1) - m._col(2)._row(0)*m._col(1)._row(1)); + #undef _col + #undef _row + } }; template struct MatrixDeterminant<2, T> { @@ -394,6 +411,16 @@ template struct MatrixDeterminant<2, T> { on debug builds (saves a lot, yet doesn't obfuscate too much) */ return m._data[0]._data[0]*m._data[1]._data[1] - m._data[1]._data[0]*m._data[0]._data[1]; } + + /* Used internally by cofactor(), basically just an inlined variant of + ij(skipCol, skipRow).determinant() */ + constexpr T operator()(const Matrix<3, T>& m, const std::size_t skipCol, const std::size_t skipRow) const { + #define _col(i) _data[i + (i >= skipCol)] + #define _row(i) _data[i + (i >= skipRow)] + return m._col(0)._row(0)*m._col(1)._row(1) - m._col(1)._row(0)*m._col(0)._row(1); + #undef _col + #undef _row + } }; template struct MatrixDeterminant<1, T> { @@ -402,6 +429,12 @@ template struct MatrixDeterminant<1, T> { on debug builds (saves a lot, yet doesn't obfuscate too much) */ return m._data[0]._data[0]; } + + /* Used internally by cofactor(), basically just an inlined variant of + ij(skipCol, skipRow).determinant() */ + constexpr T operator()(const Matrix<2, T>& m, const std::size_t skipCol, const std::size_t skipRow) const { + return m._data[0 + (0 >= skipCol)]._data[0 + (0 >= skipRow)]; + } }; template struct StrictWeakOrdering>: StrictWeakOrdering> {}; @@ -441,7 +474,7 @@ template Matrix Matrix::ij(const } template T Matrix::cofactor(std::size_t col, std::size_t row) const { - return (((row+col) & 1) ? -1 : 1)*ij(col, row).determinant(); + return (((row+col) & 1) ? -1 : 1)*Implementation::MatrixDeterminant()(*this, col, row); } template Matrix Matrix::comatrix() const {