From 87c2bc74fefb43afb5c13ce1ddf592b8665b0042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Tue, 21 Jan 2020 12:19:45 +0100 Subject: [PATCH] MeshTools: implement removeDuplicates() for discrete data as well. --- doc/changelog.dox | 2 + doc/snippets/MagnumMeshTools.cpp | 12 ++ src/Magnum/MeshTools/CMakeLists.txt | 3 +- src/Magnum/MeshTools/RemoveDuplicates.cpp | 114 ++++++++++++++++++ src/Magnum/MeshTools/RemoveDuplicates.h | 67 ++++++++-- src/Magnum/MeshTools/Test/CMakeLists.txt | 2 +- .../MeshTools/Test/RemoveDuplicatesTest.cpp | 98 +++++++++++++-- 7 files changed, 280 insertions(+), 18 deletions(-) create mode 100644 src/Magnum/MeshTools/RemoveDuplicates.cpp diff --git a/doc/changelog.dox b/doc/changelog.dox index 474e095cc..6f6fc89ad 100644 --- a/doc/changelog.dox +++ b/doc/changelog.dox @@ -110,6 +110,8 @@ See also: - Added @ref MeshTools::subdivideInPlace() for allocation-less mesh subdivision +- New @ref MeshTools::removeDuplicatesInPlace() variant that works on + discrete data in addition to floating-point @subsubsection changelog-latest-new-platform Platform libraries diff --git a/doc/snippets/MagnumMeshTools.cpp b/doc/snippets/MagnumMeshTools.cpp index 530c770db..e3e64c468 100644 --- a/doc/snippets/MagnumMeshTools.cpp +++ b/doc/snippets/MagnumMeshTools.cpp @@ -96,6 +96,18 @@ auto data = MeshTools::interleave(positions, weights, 2, vertexColors, 1); /* [interleave2] */ } +{ +/* [removeDuplicates] */ +Containers::ArrayView data; + +std::size_t size; +Containers::Array indices; +std::tie(indices, size) = MeshTools::removeDuplicatesInPlace( + Containers::arrayCast<2, char>(data)); +data = data.prefix(size); +/* [removeDuplicates] */ +} + { /* [removeDuplicates-multiple] */ std::vector positions; diff --git a/src/Magnum/MeshTools/CMakeLists.txt b/src/Magnum/MeshTools/CMakeLists.txt index 59ac7bde8..a1a7c4d47 100644 --- a/src/Magnum/MeshTools/CMakeLists.txt +++ b/src/Magnum/MeshTools/CMakeLists.txt @@ -33,7 +33,8 @@ set(MagnumMeshTools_GracefulAssert_SRCS CompressIndices.cpp Duplicate.cpp FlipNormals.cpp - GenerateNormals.cpp) + GenerateNormals.cpp + RemoveDuplicates.cpp) set(MagnumMeshTools_HEADERS CombineIndexedArrays.h diff --git a/src/Magnum/MeshTools/RemoveDuplicates.cpp b/src/Magnum/MeshTools/RemoveDuplicates.cpp new file mode 100644 index 000000000..6f7a526a6 --- /dev/null +++ b/src/Magnum/MeshTools/RemoveDuplicates.cpp @@ -0,0 +1,114 @@ +/* + This file is part of Magnum. + + Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 + Vladimír Vondruš + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +*/ + +#include +#include +#include + +#include "RemoveDuplicates.h" + +namespace Magnum { namespace MeshTools { + +struct ArrayEqual { + bool operator()(Containers::ArrayView a, Containers::ArrayView b) const { + CORRADE_INTERNAL_ASSERT(a.size() == b.size()); + return std::memcmp(a, b, a.size()) == 0; + } +}; + +struct ArrayHash { + std::size_t operator()(Containers::ArrayView a) const { + return *reinterpret_cast(Utility::MurmurHash2{}(a, a.size()).byteArray()); + } +}; + +std::pair, std::size_t> removeDuplicatesInPlace(const Containers::StridedArrayView2D& data) { + /* Assuming the second dimension is contiguous so we can calculate the + hashes easily */ + CORRADE_ASSERT(data.empty()[0] || data.isContiguous<1>(), + "MeshTools::removeDuplicatesInPlace(): second data view dimension is not contiguous", {}); + + const std::size_t dataSize = data.size()[0]; + /* Table containing index of first occurence for each unique entry. + Reserving more buckets than necessary (i.e. as if each entry was + unique). */ + std::unordered_map, UnsignedInt, ArrayHash, ArrayEqual> table{dataSize}; + + Containers::Array remapping{Containers::NoInit, dataSize}; + + /* Go through all entries */ + for(std::size_t i = 0; i != dataSize; ++i) { + /* Try to insert new entry into the table */ + const Containers::ArrayView entry = data[i].asContiguous(); + const auto result = table.emplace(entry, table.size()); + + /* Add the (either new or already existing) index into the array */ + remapping[i] = result.first->second; + + /* If this is a new combination, copy the data to new (earlier) + position in the array. Data in [table.size()-1, i) are already + present in the [0, table.size()-1) range from previous iterations so + we aren't overwriting anything. */ + if(result.second && i != table.size() - 1) + Utility::copy(entry, data[table.size() - 1].asContiguous()); + } + + CORRADE_INTERNAL_ASSERT(dataSize >= table.size()); + return {std::move(remapping), table.size()}; +} + +namespace { + +template std::size_t removeDuplicatesIndexedInPlaceImplementation(const Containers::StridedArrayView1D& indices, const Containers::StridedArrayView2D& data) { + /* Somehow ~IndexType{} doesn't work for < 4byte types, as the result is + int(-1) instead of the type I want */ + CORRADE_ASSERT(data.size()[0] <= IndexType(-1), + "MeshTools::removeDuplicatesIndexedInPlace(): a" << sizeof(IndexType) << Debug::nospace << "-byte index type is too small for" << data.size()[0] << "vertices", {}); + + /* There's no way to avoid the additional allocation, unfortunately --- + iterating over the indices instead of data would not preserve the + original order, which is an useful property. The float version has this + inverted (having the *Indexed() variant as the main implementation) + because the remapping there has to be done once for every dimension. */ + std::pair, std::size_t> result = removeDuplicatesInPlace(data); + for(auto& i: indices) i = result.first[i]; + return result.second; +} + +} + +std::size_t removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D& indices, const Containers::StridedArrayView2D& data) { + return removeDuplicatesIndexedInPlaceImplementation(indices, data); +} + +std::size_t removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D& indices, const Containers::StridedArrayView2D& data) { + return removeDuplicatesIndexedInPlaceImplementation(indices, data); +} + +std::size_t removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D& indices, const Containers::StridedArrayView2D& data) { + return removeDuplicatesIndexedInPlaceImplementation(indices, data); +} + +}} diff --git a/src/Magnum/MeshTools/RemoveDuplicates.h b/src/Magnum/MeshTools/RemoveDuplicates.h index a256bf412..504b992a3 100644 --- a/src/Magnum/MeshTools/RemoveDuplicates.h +++ b/src/Magnum/MeshTools/RemoveDuplicates.h @@ -39,6 +39,7 @@ #include "Magnum/Magnum.h" #include "Magnum/Math/FunctionsBatch.h" +#include "Magnum/MeshTools/visibility.h" namespace Magnum { namespace MeshTools { @@ -51,6 +52,55 @@ namespace Implementation { }; } +/** +@brief Remove duplicate data from given array in-place +@param[in,out] data Data array, duplicate items will be cut away with order + preserved +@return Size of unique prefix in the cleaned up @p data array and the resulting + index array +@m_since_latest + +Removes duplicate data from given array by comparing the second dimension of +each item, the second dimension is expected to be contiguous. A plain bit-exact +matching is used, if you need fuzzy comparison for floating-point data, use +@ref removeDuplicatesInPlace(const Containers::StridedArrayView1D&, typename Vector::Type) +instead. If you want to remove duplicate data from an already indexed array, +use @ref removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D&, const Containers::StridedArrayView2D&) +instead. Usage example: + +@snippet MagnumMeshTools.cpp removeDuplicates + +@see @ref Corrade::Containers::StridedArrayView::isContiguous() +*/ +MAGNUM_MESHTOOLS_EXPORT std::pair, std::size_t> removeDuplicatesInPlace(const Containers::StridedArrayView2D& data); + +/** +@brief Remove duplicates from indexed data in-place +@param[in,out] indices Index array, which will get remapped to list just + unique data +@param[in,out] data Data array, duplicate items will be cut away with order + preserved +@return Size of unique prefix in the cleaned up @p data array +@m_since_latest + +Compared to @ref removeDuplicatesInPlace(const Containers::StridedArrayView2D&) +this variant is more suited for data that are already indexed as it works on +the existing index array instead of allocating a new one. +*/ +MAGNUM_MESHTOOLS_EXPORT std::size_t removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D& indices, const Containers::StridedArrayView2D& data); + +/** + * @overload + * @m_since_latest + */ +MAGNUM_MESHTOOLS_EXPORT std::size_t removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D& indices, const Containers::StridedArrayView2D& data); + +/** + * @overload + * @m_since_latest + */ +MAGNUM_MESHTOOLS_EXPORT std::size_t removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D& indices, const Containers::StridedArrayView2D& data); + /** @brief Remove duplicate floating-point vector data from given array in-place @param[in,out] data Data array, duplicate items will be cut away with order @@ -64,13 +114,14 @@ namespace Implementation { Removes duplicate data from the array by collapsing them into buckets of size @p epsilon. First vector in given bucket is used, other ones are thrown away, no interpolation is done. Note that this function is meant to be used for -floating-point data (or generally with non-zero @p epsilon), for discrete data -the usual sorting method is much more efficient. +floating-point data (or generally with non-zero @p epsilon), for data where +bit-exact matching is sufficient use @ref removeDuplicatesInPlace(const Containers::StridedArrayView2D&) +instead. If you want to remove duplicate data from an already indexed array, use -@ref removeDuplicatesIndexedInPlace() instead. See also -@ref removeDuplicates(std::vector&, typename Vector::Type) for a -variant operating on a STL vector. +@ref removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D&, const Containers::StridedArrayView1D&, typename Vector::Type) instead. +See also @ref removeDuplicates(std::vector&, typename Vector::Type) for +a variant operating on a STL vector. */ template std::pair, std::size_t> removeDuplicatesInPlace(const Containers::StridedArrayView1D& data, typename Vector::Type epsilon = Math::TypeTraits::epsilon()); @@ -104,9 +155,9 @@ template std::vector removeDuplicates(std::vector&, typename Vector::Type) +this variant is more suited for data that are already indexed as it works on +the existing index array instead of allocating a new one. */ template std::size_t removeDuplicatesIndexedInPlace(const Containers::StridedArrayView1D& indices, const Containers::StridedArrayView1D& data, typename Vector::Type epsilon = Math::TypeTraits::epsilon()) { /* Somehow ~IndexType{} doesn't work for < 4byte types, as the result is diff --git a/src/Magnum/MeshTools/Test/CMakeLists.txt b/src/Magnum/MeshTools/Test/CMakeLists.txt index 0e2277a58..8c49708bd 100644 --- a/src/Magnum/MeshTools/Test/CMakeLists.txt +++ b/src/Magnum/MeshTools/Test/CMakeLists.txt @@ -29,7 +29,7 @@ corrade_add_test(MeshToolsDuplicateTest DuplicateTest.cpp LIBRARIES MagnumMeshTo corrade_add_test(MeshToolsFlipNormalsTest FlipNormalsTest.cpp LIBRARIES MagnumMeshToolsTestLib) corrade_add_test(MeshToolsGenerateNormalsTest GenerateNormalsTest.cpp LIBRARIES MagnumMeshToolsTestLib MagnumPrimitives) corrade_add_test(MeshToolsInterleaveTest InterleaveTest.cpp LIBRARIES Magnum) -corrade_add_test(MeshToolsRemoveDuplicatesTest RemoveDuplicatesTest.cpp LIBRARIES Magnum) +corrade_add_test(MeshToolsRemoveDuplicatesTest RemoveDuplicatesTest.cpp LIBRARIES MagnumMeshToolsTestLib) corrade_add_test(MeshToolsSubdivideTest SubdivideTest.cpp LIBRARIES Magnum) corrade_add_test(MeshToolsTipsifyTest TipsifyTest.cpp LIBRARIES MagnumMeshTools) corrade_add_test(MeshToolsTransformTest TransformTest.cpp LIBRARIES MagnumMeshTools) diff --git a/src/Magnum/MeshTools/Test/RemoveDuplicatesTest.cpp b/src/Magnum/MeshTools/Test/RemoveDuplicatesTest.cpp index 3c932751e..1891083f3 100644 --- a/src/Magnum/MeshTools/Test/RemoveDuplicatesTest.cpp +++ b/src/Magnum/MeshTools/Test/RemoveDuplicatesTest.cpp @@ -37,27 +37,109 @@ struct RemoveDuplicatesTest: TestSuite::Tester { explicit RemoveDuplicatesTest(); void removeDuplicatesInPlace(); - void removeDuplicatesStl(); + void removeDuplicatesInPlaceNonContiguous(); template void removeDuplicatesIndexedInPlace(); void removeDuplicatesIndexedInPlaceSmallType(); void removeDuplicatesIndexedInPlaceEmptyIndices(); void removeDuplicatesIndexedInPlaceEmptyIndicesVertices(); + void removeDuplicatesFuzzyInPlace(); + void removeDuplicatesFuzzyStl(); + template void removeDuplicatesFuzzyIndexedInPlace(); + void removeDuplicatesFuzzyIndexedInPlaceSmallType(); + void removeDuplicatesFuzzyIndexedInPlaceEmptyIndices(); + void removeDuplicatesFuzzyIndexedInPlaceEmptyIndicesVertices(); + /* this is additionally regression-tested in PrimitivesIcosphereTest */ }; RemoveDuplicatesTest::RemoveDuplicatesTest() { addTests({&RemoveDuplicatesTest::removeDuplicatesInPlace, - &RemoveDuplicatesTest::removeDuplicatesStl, + &RemoveDuplicatesTest::removeDuplicatesInPlaceNonContiguous, &RemoveDuplicatesTest::removeDuplicatesIndexedInPlace, &RemoveDuplicatesTest::removeDuplicatesIndexedInPlace, &RemoveDuplicatesTest::removeDuplicatesIndexedInPlace, &RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceSmallType, &RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceEmptyIndices, - &RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceEmptyIndicesVertices}); + &RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceEmptyIndicesVertices, + + &RemoveDuplicatesTest::removeDuplicatesFuzzyInPlace, + &RemoveDuplicatesTest::removeDuplicatesFuzzyStl, + &RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlace, + &RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlace, + &RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlace, + &RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlaceSmallType, + &RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlaceEmptyIndices, + &RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlaceEmptyIndicesVertices}); } void RemoveDuplicatesTest::removeDuplicatesInPlace() { + Int data[]{-15, 32, 24, -15, 15, 7541, 24, 32}; + + std::pair, std::size_t> result = + MeshTools::removeDuplicatesInPlace(Containers::arrayCast<2, char>(Containers::arrayView(data))); + CORRADE_COMPARE_AS(Containers::arrayView(result.first), + Containers::arrayView({0, 1, 2, 0, 3, 4, 2, 1}), + TestSuite::Compare::Container); + CORRADE_COMPARE_AS(Containers::arrayView(data).prefix(result.second), + Containers::arrayView({-15, 32, 24, 15, 7541}), + TestSuite::Compare::Container); +} + +void RemoveDuplicatesTest::removeDuplicatesInPlaceNonContiguous() { + Int data[8]{}; + + std::ostringstream out; + Error redirectError{&out}; + MeshTools::removeDuplicatesInPlace(Containers::arrayCast<2, char>(Containers::arrayView(data)).every({1, 2})); + CORRADE_COMPARE(out.str(), "MeshTools::removeDuplicatesInPlace(): second data view dimension is not contiguous\n"); +} + +template void RemoveDuplicatesTest::removeDuplicatesIndexedInPlace() { + setTestCaseTemplateName(Math::TypeTraits::name()); + + T indices[]{3, 2, 0, 1, 7, 6, 4, 2, 5, 0}; + Int data[]{-15, 32, 24, -15, 15, 7541, 24, 32}; + std::size_t count = MeshTools::removeDuplicatesIndexedInPlace(indices, + Containers::arrayCast<2, char>(Containers::arrayView(data))); + + CORRADE_COMPARE_AS(Containers::arrayView(indices), + Containers::arrayView({0, 2, 0, 1, 1, 2, 3, 2, 4, 0}), + TestSuite::Compare::Container); + CORRADE_COMPARE_AS(Containers::arrayView(data).prefix(count), + Containers::arrayView({-15, 32, 24, 15, 7541}), + TestSuite::Compare::Container); +} + +void RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceSmallType() { + std::stringstream out; + Error redirectError{&out}; + + UnsignedByte indices[1]; + Vector2i data[256]{}; + MeshTools::removeDuplicatesIndexedInPlace( + Containers::stridedArrayView(indices), + Containers::arrayCast<2, char>(Containers::arrayView(data))); + CORRADE_COMPARE(out.str(), "MeshTools::removeDuplicatesIndexedInPlace(): a 1-byte index type is too small for 256 vertices\n"); +} + +void RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceEmptyIndices() { + Int data[]{-15, 32, 24, -15, 15, 7541, 24, 32}; + + std::size_t count = MeshTools::removeDuplicatesIndexedInPlace( + Containers::StridedArrayView1D{}, + Containers::arrayCast<2, char>(Containers::arrayView(data))); + CORRADE_COMPARE_AS(Containers::arrayView(data).prefix(count), + Containers::arrayView({-15, 32, 24, 15, 7541}), + TestSuite::Compare::Container); +} + +void RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceEmptyIndicesVertices() { + CORRADE_COMPARE(MeshTools::removeDuplicatesIndexedInPlace( + Containers::StridedArrayView1D{}, {}), 0); +} + +void RemoveDuplicatesTest::removeDuplicatesFuzzyInPlace() { /* Numbers with distance 1 should be merged, numbers with distance 2 should be kept. Testing both even-odd and odd-even sequence to verify that half-epsilon translations are applied properly. */ @@ -77,7 +159,7 @@ void RemoveDuplicatesTest::removeDuplicatesInPlace() { TestSuite::Compare::Container); } -void RemoveDuplicatesTest::removeDuplicatesStl() { +void RemoveDuplicatesTest::removeDuplicatesFuzzyStl() { /* Same but with implicit bloat. HEH HEH */ std::vector data{ {1, 0}, @@ -95,7 +177,7 @@ void RemoveDuplicatesTest::removeDuplicatesStl() { TestSuite::Compare::Container); } -template void RemoveDuplicatesTest::removeDuplicatesIndexedInPlace() { +template void RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlace() { setTestCaseTemplateName(Math::TypeTraits::name()); /* Same as above, but with an explicit index buffer */ @@ -118,7 +200,7 @@ template void RemoveDuplicatesTest::removeDuplicatesIndexedInPlace() { TestSuite::Compare::Container); } -void RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceSmallType() { +void RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlaceSmallType() { std::stringstream out; Error redirectError{&out}; @@ -130,7 +212,7 @@ void RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceSmallType() { CORRADE_COMPARE(out.str(), "MeshTools::removeDuplicatesIndexedInPlace(): a 1-byte index type is too small for 256 vertices\n"); } -void RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceEmptyIndices() { +void RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlaceEmptyIndices() { Vector2i data[]{ {1, 0}, {2, 1}, @@ -146,7 +228,7 @@ void RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceEmptyIndices() { TestSuite::Compare::Container); } -void RemoveDuplicatesTest::removeDuplicatesIndexedInPlaceEmptyIndicesVertices() { +void RemoveDuplicatesTest::removeDuplicatesFuzzyIndexedInPlaceEmptyIndicesVertices() { CORRADE_COMPARE((MeshTools::removeDuplicatesIndexedInPlace({}, {}, 2)), 0); }