From 7549d107dec864b34f727e9d94910498752b23a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= <mosra@centrum.cz>
Date: Mon, 15 Jul 2019 17:24:53 +0200
Subject: [PATCH] python: implement buffer protocol for Vector types as well.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This makes Vector3 to np.array conversion about 20x faster. Yes, *that*
much. Crazy. Timings from the benchmark added in previous commit before:

  np.array([])                                                  0.66096 µs
  np.array([1.0, 2.0, 3.0])                                     0.70623 µs
  a = array.array("f", [1.0, 2.0, 3.0]); np.array(a)            0.57877 µs
  a = Vector3(1.0, 2.0, 3.0); np.array(a)                      18.18542 µs

after:

  np.array([])                                                  0.57162 µs
  np.array([1.0, 2.0, 3.0])                                     0.68309 µs
  a = array.array("f", [1.0, 2.0, 3.0]); np.array(a)            0.53958 µs
  a = Vector3(1.0, 2.0, 3.0); np.array(a)                       0.74818 µs

There's still some overhead that could be removed I think, making the
Vector3-to-numpy conversion faster than list-to-numpy.
---
 doc/python/magnum.math.rst                | 20 +++----
 src/python/magnum/math.vector.h           | 64 +++++++++++++++--------
 src/python/magnum/math.vectorfloat.cpp    | 28 +++++++---
 src/python/magnum/math.vectorintegral.cpp | 22 +++++---
 src/python/magnum/test/benchmark_math.py  |  2 +-
 5 files changed, 90 insertions(+), 46 deletions(-)
diff --git a/doc/python/magnum.math.rst b/doc/python/magnum.math.rst
index 72f17d4..1388c94 100644
--- a/doc/python/magnum.math.rst
+++ b/doc/python/magnum.math.rst
@@ -115,10 +115,10 @@
     ============================================
 
     All vector classes are implicitly convertible from a tuple of correct size
-    and type as well as any type implementing the buffer protocol, and these
-    can be also converted back to lists using list comprehensions. This makes
-    them fully compatible with `numpy.array`, so the following expressions are
-    completely valid:
+    and type as well as from/to type implementing the buffer protocol, and
+    these can be also converted back to lists using list comprehensions. This
+    makes them fully compatible with `numpy.array`, so the following
+    expressions are completely valid:
 
     ..
         >>> import numpy as np
@@ -135,15 +135,15 @@
 
         >>> m = Matrix4.scaling((0.5, 0.5, 1.0))
         >>> np.array(m.diagonal())
-        array([0.5, 0.5, 1. , 1. ])
+        array([0.5, 0.5, 1. , 1. ], dtype=float32)
 
     For matrices it's a bit more complicated, since Magnum is using
     column-major layout while numpy defaults to row-major (but can do
-    column-major as well). Matrices thus implement the buffer protocol for both
-    directions of the conversion to give numpy proper metadata and while they
-    are implicitly convertible from/to types implementing a buffer protocol,
-    they *are not* implicitly convertible from/to plain tuples like vectors
-    are.
+    column-major as well). To ensure proper conversions, the buffer protocol
+    implementation for matrix types handles the layout conversion as well.
+    While the matrix are implicitly convertible from/to types implementing a
+    buffer protocol, they *are not* implicitly convertible from/to plain tuples
+    like vectors are.
 
     To simplify the implementation, Magnum matrices are convertible only from
     32-bit and 64-bit floating-point types (:py:`'f'` and :py:`'d'` numpy
diff --git a/src/python/magnum/math.vector.h b/src/python/magnum/math.vector.h
index 84d4ee0..00dbf11 100644
--- a/src/python/magnum/math.vector.h
+++ b/src/python/magnum/math.vector.h
@@ -91,27 +91,24 @@ template<class T, class ...Args> void everyVector(py::class_<T, Args...>& c) {
         }, "Construct a zero vector")
         .def(py::init(), "Default constructor")
 
-        /* Buffer protocol. If not present, implicit conversion from numpy
-           arrays of non-default types somehow doesn't work. On the other hand
-           only the constructor is needed (and thus also no py::buffer_protocol()
-           specified for the class), converting vectors to numpy arrays is
-           doable using the simple iteration iterface. */
-        .def(py::init([](py::buffer buffer) {
-            py::buffer_info info = buffer.request();
-
-            if(info.ndim != 1)
-                throw py::buffer_error{Utility::formatString("expected 1 dimension but got {}", info.ndim)};
-
-            if(info.shape[0] != T::Size)
-                throw py::buffer_error{Utility::formatString("expected {} elements but got {}", T::Size, info.shape[0])};
-
-            if(!isTypeCompatible<typename T::Type>(info.format))
-                throw py::buffer_error{Utility::formatString("unexpected format {} for a {} vector", info.format, py::format_descriptor<typename T::Type>::format())};
-
-            T out{Math::NoInit};
-            initFromBuffer(out, info, std::is_floating_point<typename T::Type>{}, std::is_signed<typename T::Type>{});
-            return out;
-        }), "Construct from a buffer")
+        /* Ideally, only the constructor (in vectorBuffer()) would be needed
+           (and thus also no py::buffer_protocol() specified for the class),
+           but conversion of vectors to lists is extremely slow due to pybind
+           exceptions being somehow extra heavy compared to native python ones,
+           so in order to have acceptable performance we need the buffer
+           protocol on the other side as well. See test/benchmark_math.py for
+           more information. */
+        .def_buffer([](const T& self) -> py::buffer_info {
+            // TODO: ownership?
+            return py::buffer_info{
+                const_cast<typename T::Type*>(self.data()),
+                sizeof(typename T::Type),
+                py::format_descriptor<typename T::Type>::format(),
+                1,
+                {T::Size},
+                {sizeof(typename T::Type)}
+            };
+        })
 
         /* Operators */
         .def(-py::self, "Negated vector")
@@ -131,6 +128,31 @@ template<class T, class ...Args> void everyVector(py::class_<T, Args...>& c) {
         .def(typename T::Type{} / py::self, "Divide a vector with a scalar and invert");
 }
 
+/* Separate because it needs to be registered after the type conversion
+   constructors */
+template<class T, class ...Args> void vectorBuffer(py::class_<T, Args...>& c) {
+    c
+        /* Buffer protocol. If not present, implicit conversion from numpy
+           arrays of non-default types somehow doesn't work. There's also the
+           other part in vectorBuffer(). */
+        .def(py::init([](py::buffer buffer) {
+            py::buffer_info info = buffer.request();
+
+            if(info.ndim != 1)
+                throw py::buffer_error{Utility::formatString("expected 1 dimension but got {}", info.ndim)};
+
+            if(info.shape[0] != T::Size)
+                throw py::buffer_error{Utility::formatString("expected {} elements but got {}", T::Size, info.shape[0])};
+
+            if(!isTypeCompatible<typename T::Type>(info.format))
+                throw py::buffer_error{Utility::formatString("unexpected format {} for a {} vector", info.format, py::format_descriptor<typename T::Type>::format())};
+
+            T out{Math::NoInit};
+            initFromBuffer(out, info, std::is_floating_point<typename T::Type>{}, std::is_signed<typename T::Type>{});
+            return out;
+        }), "Construct from a buffer");
+}
+
 /* Things common for vectors of all sizes and types */
 template<class T> void vector(py::module& m, py::class_<T>& c) {
     /*
diff --git a/src/python/magnum/math.vectorfloat.cpp b/src/python/magnum/math.vectorfloat.cpp
index 3e251ff..de9c6db 100644
--- a/src/python/magnum/math.vectorfloat.cpp
+++ b/src/python/magnum/math.vectorfloat.cpp
@@ -80,21 +80,21 @@ template<class T> void vectorsFloat(py::module& m, py::class_<Math::Vector2<T>>&
 }
 
 void mathVectorFloat(py::module& root, py::module& m) {
-    py::class_<Vector2> vector2{root, "Vector2", "Two-component float vector"};
-    py::class_<Vector3> vector3{root, "Vector3", "Threee-component float vector"};
-    py::class_<Vector4> vector4{root, "Vector4", "Four-component float vector"};
-    py::class_<Vector2d> vector2d{root, "Vector2d", "Two-component double vector"};
-    py::class_<Vector3d> vector3d{root, "Vector3d", "Threee-component double vector"};
-    py::class_<Vector4d> vector4d{root, "Vector4d", "Four-component double vector"};
+    py::class_<Vector2> vector2{root, "Vector2", "Two-component float vector", py::buffer_protocol{}};
+    py::class_<Vector3> vector3{root, "Vector3", "Threee-component float vector", py::buffer_protocol{}};
+    py::class_<Vector4> vector4{root, "Vector4", "Four-component float vector", py::buffer_protocol{}};
+    py::class_<Vector2d> vector2d{root, "Vector2d", "Two-component double vector", py::buffer_protocol{}};
+    py::class_<Vector3d> vector3d{root, "Vector3d", "Threee-component double vector", py::buffer_protocol{}};
+    py::class_<Vector4d> vector4d{root, "Vector4d", "Four-component double vector", py::buffer_protocol{}};
     vectorsFloat<Float>(m, vector2, vector3, vector4);
     vectorsFloat<Double>(m, vector2d, vector3d, vector4d);
 
-    py::class_<Color3, Vector3> color3_{root, "Color3", "Color in linear RGB color space"};
+    py::class_<Color3, Vector3> color3_{root, "Color3", "Color in linear RGB color space", py::buffer_protocol{}};
     everyVector(color3_);
     color(color3_);
     color3(color3_);
 
-    py::class_<Color4, Vector4> color4_{root, "Color4", "Color in linear RGBA color space"};
+    py::class_<Color4, Vector4> color4_{root, "Color4", "Color in linear RGBA color space", py::buffer_protocol{}};
     everyVector(color4_);
     color(color4_);
     color4(color4_);
@@ -110,6 +110,18 @@ void mathVectorFloat(py::module& root, py::module& m) {
     convertible(vector3d);
     convertible(vector4d);
     /* Colors are float-only at the moment, thus no conversions */
+
+    /* This needs to be *after* conversion constructors so the type conversion
+       gets picked before the general buffer constructor (which would then
+       fail) */
+    vectorBuffer(vector2);
+    vectorBuffer(vector3);
+    vectorBuffer(vector4);
+    vectorBuffer(vector2d);
+    vectorBuffer(vector3d);
+    vectorBuffer(vector4d);
+    vectorBuffer(color3_);
+    vectorBuffer(color4_);
 }
 
 }
diff --git a/src/python/magnum/math.vectorintegral.cpp b/src/python/magnum/math.vectorintegral.cpp
index 74dfe55..ac0daf7 100644
--- a/src/python/magnum/math.vectorintegral.cpp
+++ b/src/python/magnum/math.vectorintegral.cpp
@@ -73,12 +73,12 @@ template<class T> void vectorsIntegral(py::module& m, py::class_<Math::Vector2<T
 }
 
 void mathVectorIntegral(py::module& root, py::module& m) {
-    py::class_<Vector2i> vector2i{root, "Vector2i", "Two-component signed integer vector"};
-    py::class_<Vector3i> vector3i{root, "Vector3i", "Threee-component signed integral vector"};
-    py::class_<Vector4i> vector4i{root, "Vector4i", "Four-component signed integral vector"};
-    py::class_<Vector2ui> vector2ui{root, "Vector2ui", "Two-component unsigned integral vector"};
-    py::class_<Vector3ui> vector3ui{root, "Vector3ui", "Threee-component unsigned integral vector"};
-    py::class_<Vector4ui> vector4ui{root, "Vector4ui", "Four-component unsigned integral vector"};
+    py::class_<Vector2i> vector2i{root, "Vector2i", "Two-component signed integer vector", py::buffer_protocol{}};
+    py::class_<Vector3i> vector3i{root, "Vector3i", "Threee-component signed integral vector", py::buffer_protocol{}};
+    py::class_<Vector4i> vector4i{root, "Vector4i", "Four-component signed integral vector", py::buffer_protocol{}};
+    py::class_<Vector2ui> vector2ui{root, "Vector2ui", "Two-component unsigned integral vector", py::buffer_protocol{}};
+    py::class_<Vector3ui> vector3ui{root, "Vector3ui", "Threee-component unsigned integral vector", py::buffer_protocol{}};
+    py::class_<Vector4ui> vector4ui{root, "Vector4ui", "Four-component unsigned integral vector", py::buffer_protocol{}};
     vectorsIntegral<Int>(m, vector2i, vector3i, vector4i);
     vectorsIntegral<UnsignedInt>(m, vector2ui, vector3ui, vector4ui);
 
@@ -90,6 +90,16 @@ void mathVectorIntegral(py::module& root, py::module& m) {
     convertible(vector2ui);
     convertible(vector3ui);
     convertible(vector4ui);
+
+    /* This needs to be *after* conversion constructors so the type conversion
+       gets picked before the general buffer constructor (which would then
+       fail) */
+    vectorBuffer(vector2i);
+    vectorBuffer(vector3i);
+    vectorBuffer(vector4i);
+    vectorBuffer(vector2ui);
+    vectorBuffer(vector3ui);
+    vectorBuffer(vector4ui);
 }
 
 }
diff --git a/src/python/magnum/test/benchmark_math.py b/src/python/magnum/test/benchmark_math.py
index 74f17c2..9b2d94c 100755
--- a/src/python/magnum/test/benchmark_math.py
+++ b/src/python/magnum/test/benchmark_math.py
@@ -71,7 +71,7 @@ print("\n  constructing builtin array:\n")
 timethat('array.array("f", [])')
 timethat('array.array("f", [1.0, 2.0, 3.0])')
 timethat('memoryview(a)', setup='a = array.array("f", [1.0, 2.0, 3.0])')
-#timethat('memoryview(a)', setup='a = Vector3(1.0, 2.0, 3.0)')
+timethat('memoryview(a)', setup='a = Vector3(1.0, 2.0, 3.0)')
 
 print("\n  constructing np.array:\n")