From e1c9c4d0074eb0537ea5667c70c257466b12e220 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20Vondru=C5=A1?= Date: Tue, 9 Jul 2024 19:52:55 +0200 Subject: [PATCH] Text: utility for mapping input text byte ranges to output glyphs. For text selection and cursor movement. --- doc/changelog.dox | 3 + src/Magnum/Text/Renderer.cpp | 47 ++++++ src/Magnum/Text/Renderer.h | 33 +++++ src/Magnum/Text/Test/RendererTest.cpp | 196 ++++++++++++++++++++++++++ 4 files changed, 279 insertions(+) diff --git a/doc/changelog.dox b/doc/changelog.dox index 89e474210..4afa18cf6 100644 --- a/doc/changelog.dox +++ b/doc/changelog.dox @@ -407,6 +407,9 @@ See also: @ref Text::renderGlyphQuadsInto(), @ref Text::alignRenderedLine(), @ref Text::alignRenderedBlock() and @ref Text::renderGlyphQuadIndicesInto() APIs providing low-level access to the text renderer building blocks +- New @ref Text::glyphRangeForBytes() API for providing byte-to-glyph mapping + for arbitrarily complex shapers using the output from + @ref Text::AbstractShaper::glyphClustersInto() @subsubsection changelog-latest-new-texturetools TextureTools library diff --git a/src/Magnum/Text/Renderer.cpp b/src/Magnum/Text/Renderer.cpp index 2b8154fcd..054256f0d 100644 --- a/src/Magnum/Text/Renderer.cpp +++ b/src/Magnum/Text/Renderer.cpp @@ -298,6 +298,53 @@ void renderGlyphQuadIndicesInto(UnsignedInt glyphOffset, const Containers::Strid renderGlyphQuadIndicesIntoInternal(glyphOffset, indices); } +Containers::Pair glyphRangeForBytes(const Containers::StridedArrayView1D& clusters, const UnsignedInt begin, const UnsignedInt end) { + if(clusters.isEmpty()) + return {}; + + /* Make the begin always less than or equal to end */ + const bool reverseBeginEnd = begin > end; + const UnsignedInt beginForward = reverseBeginEnd ? end : begin; + const UnsignedInt endForward = reverseBeginEnd ? begin : end; + + /* Make the cluster array always in an ascending order as well */ + const bool reverseClusters = clusters.front() > clusters.back(); + const Containers::StridedArrayView1D clustersForward = + reverseClusters ? clusters.flipped<0>() : clusters; + + /* The glyph begin is the last glyph that has the cluster ID not larger + than `begin`, or the end */ + UnsignedInt glyphBegin = 0; + while(glyphBegin != clustersForward.size() && clustersForward[glyphBegin] < beginForward && (glyphBegin + 1 == clustersForward.size() || clustersForward[glyphBegin + 1] <= beginForward)) + ++glyphBegin; + + /* If `begin` was pointing in the middle of a cluster, for example of a + ligature, or (wrongly) inside a multi-byte UTF-8 char, go back to find + the cluster begin */ + if(glyphBegin != clustersForward.size()) while(glyphBegin && clustersForward[glyphBegin - 1] == clustersForward[glyphBegin]) + --glyphBegin; + + /* The end is then the first glyph after glyph begin that has the cluster + ID larger or equal to `end`. Unless `begin` was the same as `end`, then + the returned glyph end is same as returned glyph begin. */ + UnsignedInt glyphEnd = glyphBegin; + if(beginForward != endForward) while(glyphEnd != clustersForward.size() && clustersForward[glyphEnd] < endForward) + ++glyphEnd; + + /* If the clusters were in reverse direction, reverse the actual glyph IDs + as well. And this way the begin is greater or equal to end, so they're + swapped too. */ + const Containers::Pair out = reverseClusters ? + Containers::pair(UnsignedInt(clustersForward.size()) - glyphEnd, + UnsignedInt(clustersForward.size()) - glyphBegin) : + Containers::pair(glyphBegin, glyphEnd); + + /* Then, if the begin and end was swapped, swap the output again as well */ + return reverseBeginEnd ? + Containers::pair(out.second(), out.first()) : + out; +} + #ifdef MAGNUM_TARGET_GL namespace { diff --git a/src/Magnum/Text/Renderer.h b/src/Magnum/Text/Renderer.h index b0bd42b6d..1ee715479 100644 --- a/src/Magnum/Text/Renderer.h +++ b/src/Magnum/Text/Renderer.h @@ -303,6 +303,39 @@ for more information. */ MAGNUM_TEXT_EXPORT void renderGlyphQuadIndicesInto(UnsignedInt glyphOffset, const Containers::StridedArrayView1D& indices); +/** +@brief Find a glyph range corresponding to given byte range in the input text +@m_since_latest + +Assuming @p clusters is a view containing cluster IDs returned from +@ref AbstractShaper::glyphClustersInto() and @p begin and @p end are byte +positions in the text passed to @ref AbstractShaper::shape() for which the +cluster IDs were retrieved, returns a range in the glyph array that contains +given range. Assumes that @p clusters are either monotonically non-dereasing or +non-increasing. + +If @p clusters are empty or @p end is less or equal to all @p clusters, returns +@cpp {0, 0} @ce. If @p begin is greater than all @p clusters are, both return +values are set to @p clusters size. In both cases the empty returned range +means there are no glyphs corresponding to given byte. Otherwise, if the input +range is non-empty, the returned range is always at least one glyph. The +returned range always points to cluster boundaries, even if the input is inside +a multi-byte character or ligature or inside a multi-glyph cluster. + +If @p begin is greater than @p end, the first value of the output is also +greater than the second. Otherwise, the first value of the output is always +less than or equal to the second. + +At the moment, the lookup is done with an @f$ \mathcal{O}(n) @f$ complexity, +with @f$ n @f$ being size of the @p clusters view. + +Mapping in the other direction, from glyphs to input bytes, is simply +@cpp clusters[i] @ce. See @ref AbstractShaper::glyphClustersInto() for more +information about how the cluster IDs may look like depending on the input and +shaper features used. +*/ +MAGNUM_TEXT_EXPORT Containers::Pair glyphRangeForBytes(const Containers::StridedArrayView1D& clusters, UnsignedInt begin, UnsignedInt end); + #ifdef MAGNUM_TARGET_GL /** @brief Base for text renderers diff --git a/src/Magnum/Text/Test/RendererTest.cpp b/src/Magnum/Text/Test/RendererTest.cpp index 4bedfc266..52f2a85f1 100644 --- a/src/Magnum/Text/Test/RendererTest.cpp +++ b/src/Magnum/Text/Test/RendererTest.cpp @@ -70,6 +70,8 @@ struct RendererTest: TestSuite::Tester { template void glyphQuadIndices(); void glyphQuadIndicesTypeTooSmall(); + void glyphRangeForBytes(); + void renderData(); void multiline(); @@ -119,6 +121,30 @@ const struct { {"middle, integral", Alignment::MiddleLeftIntegral, -15.0f} }; +const struct { + const char* name; + bool ascending; + Containers::Pair(*function)(const Containers::StridedArrayView1D&, UnsignedInt, UnsignedInt); +} GlyphRangeForBytesData[]{ + {"", true, + glyphRangeForBytes}, + {"reverse direction", false, + glyphRangeForBytes}, + {"swapped begin & end", true, + [](const Containers::StridedArrayView1D& clusters, UnsignedInt begin, UnsignedInt end) { + /* If begin > end, the output should be also swapped, so swapping + it back should result in the same thing as with non-swapped + input */ + Containers::Pair out = glyphRangeForBytes(clusters, end, begin); + return Containers::pair(out.second(), out.first()); + }}, + {"swapped begin & end, reverse direction", false, + [](const Containers::StridedArrayView1D& clusters, UnsignedInt begin, UnsignedInt end) { + Containers::Pair out = glyphRangeForBytes(clusters, end, begin); + return Containers::pair(out.second(), out.first()); + }}, +}; + const struct { TestSuite::TestCaseDescriptionSourceLocation name; Alignment alignment; @@ -334,6 +360,9 @@ RendererTest::RendererTest() { &RendererTest::glyphQuadIndices, &RendererTest::glyphQuadIndicesTypeTooSmall}); + addInstancedTests({&RendererTest::glyphRangeForBytes}, + Containers::arraySize(GlyphRangeForBytesData)); + addInstancedTests({&RendererTest::renderData}, Containers::arraySize(RenderDataData)); @@ -969,6 +998,173 @@ void RendererTest::glyphQuadIndicesTypeTooSmall() { "Text::renderGlyphQuadIndicesInto(): max index value of 4294967299 cannot fit into a 32-bit type\n"); } +void RendererTest::glyphRangeForBytes() { + auto&& data = GlyphRangeForBytesData[testCaseInstanceId()]; + setTestCaseDescription(data.name); + + /* Offset from the start, some characters decomposed/reordered, some + multi-byte, and then also multi-byte to decomposed */ + UnsignedInt clusterData[]{ + 3, /* 0 9 */ + 4, /* 1 8 */ + 5, /* 2 7 */ + 5, /* 3 6 */ + 5, /* 4 5 */ + 6, /* 5 4 */ + 6, /* 6 3 */ + 9, /* 7 2 */ + 12, /* 8 1 */ + 13 /* 9 0 */ + }; + Containers::StridedArrayView1D clusters = clusterData; + if(!data.ascending) clusters = clusters.flipped<0>(); + + /* With empty clusters it means there are no glyphs, so returning 0 means + both before and after the glyph run */ + CORRADE_COMPARE(data.function(nullptr, 0, 3), Containers::pair(0u, 0u)); + CORRADE_COMPARE(data.function(nullptr, 10, 13), Containers::pair(0u, 0u)); + + /* Bytes before everything return 0, same for an empty range at the + start; if the other direction then it returns the size */ + for(Containers::Pair i: { + Containers::pair(1u, 1u), + Containers::pair(2u, 3u), + Containers::pair(3u, 3u) + }) { + CORRADE_ITERATION(i); + CORRADE_COMPARE(data.function(clusters, i.first(), i.second()), + data.ascending ? Containers::pair(0u, 0u) : + Containers::pair(10u, 10u)); + } + + /* Bytes after everything return the size (or 0 if reverse direction). Size + of the last cluster in bytes is unknown so there's no empty range at the + end */ + for(Containers::Pair i: { + Containers::pair(14u, 14u), + Containers::pair(14u, 16u) + }) { + CORRADE_COMPARE(data.function(clusters, i.first(), i.second()), + data.ascending ? Containers::pair(10u, 10u) : + Containers::pair(0u, 0u)); + } + + /* Empty ranges inside, i.e. for a cursor. In reverse direction it means + the cursor is from *the other side* of the same glyph, so +1. In other + words, if you do backspace (which always goes backwards in the byte + stream, but to the left for LTR text and to the right for RTL text), it + deletes the same glyph regardless of direction */ + CORRADE_COMPARE(data.function(clusters, 4, 4), + data.ascending ? Containers::pair(1u, 1u) : + Containers::pair(9u, 9u)); + /* This one maps from one byte to multiple glyphs */ + CORRADE_COMPARE(data.function(clusters, 5, 5), + data.ascending ? Containers::pair(2u, 2u) : + Containers::pair(8u, 8u)); + /* This one maps from multiple bytes to a single glyph, should return the + same for any byte inside that sequence */ + for(UnsignedInt i: {9, 10, 11}) { + CORRADE_ITERATION(i); + CORRADE_COMPARE(data.function(clusters, i, i), + data.ascending ? Containers::pair(7u, 7u) : + Containers::pair(3u, 3u)); + } + /* This one maps from multiple bytes to multiple glyphs, again should + return the same for any byte inside that sequence */ + for(UnsignedInt i: {6, 7, 8}) { + CORRADE_ITERATION(i); + CORRADE_COMPARE(data.function(clusters, i, i), + data.ascending ? Containers::pair(5u, 5u) : + Containers::pair(5u, 5u)); + } + + /* Single byte mapped to a single glyph, i.e. an Insert mode or a + selection. Again, in reverse direction it should cover the same glyph, + just from the other side. */ + CORRADE_COMPARE(data.function(clusters, 3, 4), + data.ascending ? Containers::pair(0u, 1u) : + Containers::pair(9u, 10u)); + CORRADE_COMPARE(data.function(clusters, 4, 5), + data.ascending ? Containers::pair(1u, 2u) : + Containers::pair(8u, 9u)); + CORRADE_COMPARE(data.function(clusters, 12, 13), + data.ascending ? Containers::pair(8u, 9u) : + Containers::pair(1u, 2u)); + + /* Multiple bytes mapped to a single glyph, as well as any subranges of + those */ + for(Containers::Pair i: { + Containers::pair(9u, 10u), + Containers::pair(9u, 11u), + Containers::pair(9u, 12u), + Containers::pair(10u, 11u), + Containers::pair(10u, 12u), + Containers::pair(11u, 12u) + }) { + CORRADE_ITERATION(i); + CORRADE_COMPARE(data.function(clusters, i.first(), i.second()), + data.ascending ? Containers::pair(7u, 8u) : + Containers::pair(2u, 3u)); + } + + /* Single byte mapped to multiple glyphs */ + CORRADE_COMPARE(data.function(clusters, 5, 6), + data.ascending ? Containers::pair(2u, 5u) : + Containers::pair(5u, 8u)); + + /* Multiple bytes mapped to multiple glyphs, as well as any subranges of + those */ + for(Containers::Pair i: { + Containers::pair(6u, 7u), + Containers::pair(6u, 8u), + Containers::pair(6u, 9u), + Containers::pair(7u, 8u), + Containers::pair(7u, 9u), + Containers::pair(8u, 9u) + }) { + CORRADE_ITERATION(i); + CORRADE_COMPARE(data.function(clusters, i.first(), i.second()), + data.ascending ? Containers::pair(5u, 7u) : + Containers::pair(3u, 5u)); + } + + /* Larger ranges */ + CORRADE_COMPARE(data.function(clusters, 4, 9), + data.ascending ? Containers::pair(1u, 7u) : + Containers::pair(3u, 9u)); + CORRADE_COMPARE(data.function(clusters, 5, 12), + data.ascending ? Containers::pair(2u, 8u) : + Containers::pair(2u, 8u)); + CORRADE_COMPARE(data.function(clusters, 3, 14), + data.ascending ? Containers::pair(0u, 10u) : + Containers::pair(0u, 10u)); + CORRADE_COMPARE(data.function(clusters, 0, 20), + data.ascending ? Containers::pair(0u, 10u) : + Containers::pair(0u, 10u)); + + /* Subsets of multi-byte ranges plus bytes after */ + for(Containers::Pair i: { + Containers::pair(7u, 12u), + Containers::pair(8u, 12u) + }) { + CORRADE_ITERATION(i); + CORRADE_COMPARE(data.function(clusters, i.first(), i.second()), + data.ascending ? Containers::pair(5u, 8u) : + Containers::pair(2u, 5u)); + } + + /* Subsets of multi-byte ranges plus bytes before */ + for(Containers::Pair i: { + Containers::pair(4u, 7u), + Containers::pair(4u, 8u) + }) { + CORRADE_ITERATION(i); + CORRADE_COMPARE(data.function(clusters, i.first(), i.second()), + data.ascending ? Containers::pair(1u, 7u) : + Containers::pair(3u, 9u)); + } +} + void RendererTest::renderData() { auto&& data = RenderDataData[testCaseInstanceId()]; setTestCaseDescription(data.name);