Browse Source

Math: use __builtin_sincos for sincos() on GCC.

Makes this thing faster on Debug. But I'm not sure whether it's due to
less function calls or this intrinsic being actually faster, heh.
Probably a combination of both.
pull/364/head
Vladimír Vondruš 7 years ago
parent
commit
13b4bf8461
  1. 3
      doc/changelog.dox
  2. 31
      src/Magnum/Math/Functions.h
  3. 50
      src/Magnum/Math/Test/FunctionsTest.cpp

3
doc/changelog.dox

@ -315,6 +315,9 @@ See also:
- Changed the way @ref Math::operator<<(Corrade::Utility::Debug&, const BoolVector<size>&) - Changed the way @ref Math::operator<<(Corrade::Utility::Debug&, const BoolVector<size>&)
works --- the output now has the same bit order as when constructing it works --- the output now has the same bit order as when constructing it
using binary literals using binary literals
- @ref Math::sincos() now uses the `__builtin_sincos` intrinsic on GCC,
making it potentially faster in case the optimizer doesn't figure out the
same
@subsubsection changelog-latest-changes-meshtools MeshTools library @subsubsection changelog-latest-changes-meshtools MeshTools library

31
src/Magnum/Math/Functions.h

@ -114,17 +114,44 @@ template<class T> inline T cos(Unit<Rad, T> angle) { return std::cos(T(angle));
template<class T> inline T cos(Unit<Deg, T> angle) { return cos(Rad<T>(angle)); } template<class T> inline T cos(Unit<Deg, T> angle) { return cos(Rad<T>(angle)); }
#endif #endif
#if defined(__GNUC__) && !defined(__clang__)
namespace Implementation {
/* GCC builtin since 3.4 (https://stackoverflow.com/a/2742861),
unfortunately either Clang nor MSVC have any alternative which wouldn't
involve inline assembly. */
inline void sincos(Float rad, Float& sin, Float& cos) {
__builtin_sincosf(rad, &sin, &cos);
}
inline void sincos(Double rad, Double& sin, Double& cos) {
__builtin_sincos(rad, &sin, &cos);
}
inline void sincos(long double rad, long double& sin, long double& cos) {
__builtin_sincosl(rad, &sin, &cos);
}
/* Assuming there's no other floating-point type */
}
#endif
/** /**
@brief Sine and cosine @brief Sine and cosine
On some architectures might be faster than doing both computations separately. On GCC, this uses the `__builtin_sincos` intrinsic (or its `f` / `l` suffixed
variants), which may be faster than calculating sine and cosine separately. On
other compilers this *might* result in the optimizer picking up the combined
instruction as well.
@see @ref sin(), @ref cos(), @ref sincos(const Dual<Rad<T>>&) @see @ref sin(), @ref cos(), @ref sincos(const Dual<Rad<T>>&)
*/ */
#ifdef DOXYGEN_GENERATING_OUTPUT #ifdef DOXYGEN_GENERATING_OUTPUT
template<class T> inline std::pair<T, T> sincos(Rad<T> angle); template<class T> inline std::pair<T, T> sincos(Rad<T> angle);
#else #else
template<class T> inline std::pair<T, T> sincos(Unit<Rad, T> angle) { template<class T> inline std::pair<T, T> sincos(Unit<Rad, T> angle) {
return {std::sin(T(angle)) ,std::cos(T(angle))}; #if defined(__GNUC__) && !defined(__clang__)
std::pair<T, T> out;
Implementation::sincos(T(angle), out.first, out.second);
return out;
#else
return {std::sin(T(angle)), std::cos(T(angle))};
#endif
} }
template<class T> inline std::pair<T, T> sincos(Unit<Deg, T> angle) { return sincos(Rad<T>(angle)); } template<class T> inline std::pair<T, T> sincos(Unit<Deg, T> angle) { return sincos(Rad<T>(angle)); }
#endif #endif

50
src/Magnum/Math/Test/FunctionsTest.cpp

@ -24,6 +24,7 @@
*/ */
#include <Corrade/TestSuite/Tester.h> #include <Corrade/TestSuite/Tester.h>
#include <Corrade/TestSuite/Compare/Numeric.h>
#include "Magnum/Math/Functions.h" #include "Magnum/Math/Functions.h"
#include "Magnum/Math/Vector4.h" #include "Magnum/Math/Vector4.h"
@ -67,8 +68,13 @@ struct FunctionsTest: Corrade::TestSuite::Tester {
void isInfVector(); void isInfVector();
void isNan(); void isNan();
void isNanfVector(); void isNanfVector();
void trigonometric(); void trigonometric();
void trigonometricWithBase(); void trigonometricWithBase();
template<class T> void sincos();
void sinCosSeparateBenchmark();
void sinCosCombinedBenchmark();
}; };
using namespace Literals; using namespace Literals;
@ -118,8 +124,18 @@ FunctionsTest::FunctionsTest() {
&FunctionsTest::isInfVector, &FunctionsTest::isInfVector,
&FunctionsTest::isNan, &FunctionsTest::isNan,
&FunctionsTest::isNanfVector, &FunctionsTest::isNanfVector,
&FunctionsTest::trigonometric, &FunctionsTest::trigonometric,
&FunctionsTest::trigonometricWithBase}); &FunctionsTest::trigonometricWithBase,
&FunctionsTest::sincos<Float>,
&FunctionsTest::sincos<Double>,
#ifndef CORRADE_TARGET_EMSCRIPTEN
&FunctionsTest::sincos<long double>,
#endif
});
addBenchmarks({&FunctionsTest::sinCosSeparateBenchmark,
&FunctionsTest::sinCosCombinedBenchmark}, 100);
} }
void FunctionsTest::powIntegral() { void FunctionsTest::powIntegral() {
@ -463,6 +479,38 @@ void FunctionsTest::trigonometricWithBase() {
CORRADE_COMPARE(Math::tan(2*Rad(Constants::pi()/8)), 1.0f); CORRADE_COMPARE(Math::tan(2*Rad(Constants::pi()/8)), 1.0f);
} }
template<class T> void FunctionsTest::sincos() {
setTestCaseTemplateName(TypeTraits<T>::name());
/* For GCC's __builtin_sincos this verifies that all specializations are
correct */
CORRADE_COMPARE(Math::sincos(Math::Deg<T>(T(30.0))).first, T(0.5));
CORRADE_COMPARE(Math::sincos(Math::Deg<T>(T(30.0))).second, T(0.8660254037844386));
}
void FunctionsTest::sinCosSeparateBenchmark() {
Float sin{}, cos{}, a{};
CORRADE_BENCHMARK(1000) {
sin += Math::sin(Rad(a));
cos += Math::cos(Rad(a));
a += 0.1f;
}
CORRADE_COMPARE_AS(a, 10.0f, Corrade::TestSuite::Compare::Greater);
}
void FunctionsTest::sinCosCombinedBenchmark() {
Float sin{}, cos{}, a{};
CORRADE_BENCHMARK(1000) {
auto sincos = Math::sincos(Rad(a));
sin += sincos.first;
cos += sincos.second;
a += 0.1f;
}
CORRADE_COMPARE_AS(a, 10.0f, Corrade::TestSuite::Compare::Greater);
}
}}}} }}}}
CORRADE_TEST_MAIN(Magnum::Math::Test::FunctionsTest) CORRADE_TEST_MAIN(Magnum::Math::Test::FunctionsTest)

Loading…
Cancel
Save