From 13b4bf8461a1f357fca6822a9a8e83ecaf78e0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Wed, 7 Aug 2019 20:25:46 +0200 Subject: [PATCH] Math: use __builtin_sincos for sincos() on GCC. Makes this thing faster on Debug. But I'm not sure whether it's due to less function calls or this intrinsic being actually faster, heh. Probably a combination of both. --- doc/changelog.dox | 3 ++ src/Magnum/Math/Functions.h | 31 ++++++++++++++-- src/Magnum/Math/Test/FunctionsTest.cpp | 50 +++++++++++++++++++++++++- 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/doc/changelog.dox b/doc/changelog.dox index 73c207a78..206bc975c 100644 --- a/doc/changelog.dox +++ b/doc/changelog.dox @@ -315,6 +315,9 @@ See also: - Changed the way @ref Math::operator<<(Corrade::Utility::Debug&, const BoolVector&) works --- the output now has the same bit order as when constructing it using binary literals +- @ref Math::sincos() now uses the `__builtin_sincos` intrinsic on GCC, + making it potentially faster in case the optimizer doesn't figure out the + same @subsubsection changelog-latest-changes-meshtools MeshTools library diff --git a/src/Magnum/Math/Functions.h b/src/Magnum/Math/Functions.h index 67640fd11..f8605cfe3 100644 --- a/src/Magnum/Math/Functions.h +++ b/src/Magnum/Math/Functions.h @@ -114,17 +114,44 @@ template inline T cos(Unit angle) { return std::cos(T(angle)); template inline T cos(Unit angle) { return cos(Rad(angle)); } #endif +#if defined(__GNUC__) && !defined(__clang__) +namespace Implementation { + /* GCC builtin since 3.4 (https://stackoverflow.com/a/2742861), + unfortunately either Clang nor MSVC have any alternative which wouldn't + involve inline assembly. */ + inline void sincos(Float rad, Float& sin, Float& cos) { + __builtin_sincosf(rad, &sin, &cos); + } + inline void sincos(Double rad, Double& sin, Double& cos) { + __builtin_sincos(rad, &sin, &cos); + } + inline void sincos(long double rad, long double& sin, long double& cos) { + __builtin_sincosl(rad, &sin, &cos); + } + /* Assuming there's no other floating-point type */ +} +#endif + /** @brief Sine and cosine -On some architectures might be faster than doing both computations separately. +On GCC, this uses the `__builtin_sincos` intrinsic (or its `f` / `l` suffixed +variants), which may be faster than calculating sine and cosine separately. On +other compilers this *might* result in the optimizer picking up the combined +instruction as well. @see @ref sin(), @ref cos(), @ref sincos(const Dual>&) */ #ifdef DOXYGEN_GENERATING_OUTPUT template inline std::pair sincos(Rad angle); #else template inline std::pair sincos(Unit angle) { - return {std::sin(T(angle)) ,std::cos(T(angle))}; + #if defined(__GNUC__) && !defined(__clang__) + std::pair out; + Implementation::sincos(T(angle), out.first, out.second); + return out; + #else + return {std::sin(T(angle)), std::cos(T(angle))}; + #endif } template inline std::pair sincos(Unit angle) { return sincos(Rad(angle)); } #endif diff --git a/src/Magnum/Math/Test/FunctionsTest.cpp b/src/Magnum/Math/Test/FunctionsTest.cpp index c5f01c568..cf2b20cc7 100644 --- a/src/Magnum/Math/Test/FunctionsTest.cpp +++ b/src/Magnum/Math/Test/FunctionsTest.cpp @@ -24,6 +24,7 @@ */ #include +#include #include "Magnum/Math/Functions.h" #include "Magnum/Math/Vector4.h" @@ -67,8 +68,13 @@ struct FunctionsTest: Corrade::TestSuite::Tester { void isInfVector(); void isNan(); void isNanfVector(); + void trigonometric(); void trigonometricWithBase(); + template void sincos(); + + void sinCosSeparateBenchmark(); + void sinCosCombinedBenchmark(); }; using namespace Literals; @@ -118,8 +124,18 @@ FunctionsTest::FunctionsTest() { &FunctionsTest::isInfVector, &FunctionsTest::isNan, &FunctionsTest::isNanfVector, + &FunctionsTest::trigonometric, - &FunctionsTest::trigonometricWithBase}); + &FunctionsTest::trigonometricWithBase, + &FunctionsTest::sincos, + &FunctionsTest::sincos, + #ifndef CORRADE_TARGET_EMSCRIPTEN + &FunctionsTest::sincos, + #endif + }); + + addBenchmarks({&FunctionsTest::sinCosSeparateBenchmark, + &FunctionsTest::sinCosCombinedBenchmark}, 100); } void FunctionsTest::powIntegral() { @@ -463,6 +479,38 @@ void FunctionsTest::trigonometricWithBase() { CORRADE_COMPARE(Math::tan(2*Rad(Constants::pi()/8)), 1.0f); } +template void FunctionsTest::sincos() { + setTestCaseTemplateName(TypeTraits::name()); + + /* For GCC's __builtin_sincos this verifies that all specializations are + correct */ + CORRADE_COMPARE(Math::sincos(Math::Deg(T(30.0))).first, T(0.5)); + CORRADE_COMPARE(Math::sincos(Math::Deg(T(30.0))).second, T(0.8660254037844386)); +} + +void FunctionsTest::sinCosSeparateBenchmark() { + Float sin{}, cos{}, a{}; + CORRADE_BENCHMARK(1000) { + sin += Math::sin(Rad(a)); + cos += Math::cos(Rad(a)); + a += 0.1f; + } + + CORRADE_COMPARE_AS(a, 10.0f, Corrade::TestSuite::Compare::Greater); +} + +void FunctionsTest::sinCosCombinedBenchmark() { + Float sin{}, cos{}, a{}; + CORRADE_BENCHMARK(1000) { + auto sincos = Math::sincos(Rad(a)); + sin += sincos.first; + cos += sincos.second; + a += 0.1f; + } + + CORRADE_COMPARE_AS(a, 10.0f, Corrade::TestSuite::Compare::Greater); +} + }}}} CORRADE_TEST_MAIN(Magnum::Math::Test::FunctionsTest)