Browse Source

Language client: improved support for both UTF-16 offsets and offsetEncoding set to utf-8

pipelines/353213535
eidheim 5 years ago
parent
commit
be5e36627d
  1. 41
      src/source_language_protocol.cpp
  2. 1
      src/source_language_protocol.hpp
  3. 61
      src/utility.cpp
  4. 5
      src/utility.hpp
  5. 34
      tests/utility_test.cpp

41
src/source_language_protocol.cpp

@ -254,6 +254,8 @@ LanguageProtocol::Capabilities LanguageProtocol::Client::initialize(Source::Lang
capabilities.type_coverage = capabilities_pt->get<bool>("typeCoverageProvider", false);
}
capabilities.use_line_index = result.get<std::string>("offsetEncoding", "") == "utf-8";
write_notification("initialized", "");
}
result_processed.set_value();
@ -709,13 +711,11 @@ void Source::LanguageProtocolView::setup_navigation_and_refactoring() {
});
result_processed.get_future().get();
auto embolden_token = [](std::string &line, int token_start_pos, int token_end_pos) {
Glib::ustring uline = std::move(line);
//markup token as bold
auto embolden_token = [this](std::string &line, int token_start_pos, int token_end_pos) {
// Markup token as bold
size_t pos = 0;
while((pos = uline.find('&', pos)) != Glib::ustring::npos) {
size_t pos2 = uline.find(';', pos + 2);
while((pos = line.find('&', pos)) != std::string::npos) {
size_t pos2 = line.find(';', pos + 2);
if(static_cast<size_t>(token_start_pos) > pos) {
token_start_pos += pos2 - pos;
token_end_pos += pos2 - pos;
@ -726,22 +726,27 @@ void Source::LanguageProtocolView::setup_navigation_and_refactoring() {
break;
pos = pos2 + 1;
}
if(static_cast<size_t>(token_start_pos) > uline.size())
token_start_pos = uline.size();
if(static_cast<size_t>(token_end_pos) > uline.size())
token_end_pos = uline.size();
if(token_start_pos != token_end_pos) {
uline.insert(token_end_pos, "</b>");
uline.insert(token_start_pos, "<b>");
if(!capabilities.use_line_index) {
auto code_units_diff = token_end_pos - token_start_pos;
token_start_pos = utf16_code_units_byte_count(line, token_start_pos);
token_end_pos = token_start_pos + utf16_code_units_byte_count(line, code_units_diff, token_start_pos);
}
if(static_cast<size_t>(token_start_pos) > line.size())
token_start_pos = line.size();
if(static_cast<size_t>(token_end_pos) > line.size())
token_end_pos = line.size();
if(token_start_pos < token_end_pos) {
line.insert(token_end_pos, "</b>");
line.insert(token_start_pos, "<b>");
}
size_t start_pos = 0;
while(start_pos < uline.size() && (uline[start_pos] == ' ' || uline[start_pos] == '\t'))
while(start_pos < line.size() && (line[start_pos] == ' ' || line[start_pos] == '\t'))
++start_pos;
if(start_pos > 0)
uline.erase(0, start_pos);
line = std::move(uline);
line.erase(0, start_pos);
};
std::unordered_map<std::string, std::vector<std::string>> file_lines;
@ -1278,6 +1283,8 @@ void Source::LanguageProtocolView::update_diagnostics(std::vector<LanguageProtoc
}
Gtk::TextIter Source::LanguageProtocolView::get_iter_at_line_pos(int line, int pos) {
if(capabilities.use_line_index)
return get_iter_at_line_index(line, pos);
return get_iter_at_line_offset(line, pos);
}

1
src/source_language_protocol.hpp

@ -110,6 +110,7 @@ namespace LanguageProtocol {
bool rename = false;
bool code_action = false;
bool type_coverage = false;
bool use_line_index = false;
};
std::string escape_text(std::string text);

61
src/utility.cpp

@ -9,13 +9,70 @@ ScopeGuard::~ScopeGuard() {
size_t utf8_character_count(const std::string &text, size_t pos, size_t length) noexcept {
size_t count = 0;
auto size = length == std::string::npos ? text.size() : std::min(pos + length, text.size());
for(; pos < size; ++pos) {
if(static_cast<unsigned char>(text[pos]) <= 0b01111111 || static_cast<unsigned char>(text[pos]) >= 0b11000000)
for(; pos < size;) {
if(static_cast<unsigned char>(text[pos]) <= 0b01111111) {
++count;
++pos;
}
else if(static_cast<unsigned char>(text[pos]) >= 0b11111000) // Invalid UTF-8 byte
++pos;
else if(static_cast<unsigned char>(text[pos]) >= 0b11110000) {
++count;
pos += 4;
}
else if(static_cast<unsigned char>(text[pos]) >= 0b11100000) {
++count;
pos += 3;
}
else if(static_cast<unsigned char>(text[pos]) >= 0b11000000) {
++count;
pos += 2;
}
else // // Invalid start of UTF-8 character
++pos;
}
return count;
}
size_t utf16_code_units_byte_count(const std::string &text, size_t code_units, size_t start_pos) {
if(code_units == 0)
return 0;
size_t pos = start_pos;
size_t current_code_units = 0;
for(; pos < text.size();) {
if(static_cast<unsigned char>(text[pos]) <= 0b01111111) {
++current_code_units;
++pos;
if(current_code_units >= code_units)
break;
}
else if(static_cast<unsigned char>(text[pos]) >= 0b11111000) // Invalid UTF-8 byte
++pos;
else if(static_cast<unsigned char>(text[pos]) >= 0b11110000) {
current_code_units += 2;
pos += 4;
if(current_code_units >= code_units)
break;
}
else if(static_cast<unsigned char>(text[pos]) >= 0b11100000) {
++current_code_units;
pos += 3;
if(current_code_units >= code_units)
break;
}
else if(static_cast<unsigned char>(text[pos]) >= 0b11000000) {
++current_code_units;
pos += 2;
if(current_code_units >= code_units)
break;
}
else // // Invalid start of UTF-8 character
++pos;
}
return pos - start_pos;
}
bool starts_with(const char *str, const std::string &test) noexcept {
for(size_t i = 0; i < test.size(); ++i) {
if(*str == '\0')

5
src/utility.hpp

@ -8,9 +8,12 @@ public:
~ScopeGuard();
};
/// Returns number of utf8 characters in text argument
/// Returns number of utf8 characters in text
size_t utf8_character_count(const std::string &text, size_t pos = 0, size_t length = std::string::npos) noexcept;
/// Returns number of bytes in the given utf16 code units in text
size_t utf16_code_units_byte_count(const std::string &text, size_t code_units, size_t start_pos = 0);
bool starts_with(const char *str, const std::string &test) noexcept;
bool starts_with(const char *str, const char *test) noexcept;
bool starts_with(const std::string &str, const std::string &test) noexcept;

34
tests/utility_test.cpp

@ -16,6 +16,40 @@ int main() {
g_assert(utf8_character_count("æøå") == 3);
g_assert(utf8_character_count("æøåtest") == 7);
g_assert_cmpuint(utf16_code_units_byte_count("", 0), ==, 0);
g_assert_cmpuint(utf16_code_units_byte_count("", 1), ==, 0);
g_assert_cmpuint(utf16_code_units_byte_count("test", 0), ==, 0);
g_assert_cmpuint(utf16_code_units_byte_count("test", 1), ==, 1);
g_assert_cmpuint(utf16_code_units_byte_count("test", 3), ==, 3);
g_assert_cmpuint(utf16_code_units_byte_count("test", 4), ==, 4);
g_assert_cmpuint(utf16_code_units_byte_count("test", 5), ==, 4);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 0), ==, 0);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 1), ==, 2);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 2), ==, 4);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 3), ==, 6);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 4), ==, 6);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 5), ==, 6);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 0, 2), ==, 0);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 1, 2), ==, 2);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 2, 2), ==, 4);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 3, 2), ==, 4);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 1, 6), ==, 0);
g_assert_cmpuint(utf16_code_units_byte_count("æøå", 0, 6), ==, 0);
g_assert_cmpuint(strlen("🔥"), ==, 4); // Fire emoji
g_assert_cmpuint(utf16_code_units_byte_count("🔥", 0), ==, 0); // Fire emoji
g_assert_cmpuint(utf16_code_units_byte_count("🔥", 2), ==, 4); // Fire emoji
g_assert_cmpuint(utf16_code_units_byte_count("🔥", 3), ==, 4); // Fire emoji
g_assert_cmpuint(utf16_code_units_byte_count("test🔥test", 0), ==, 0); // Fire emoji between test words
g_assert_cmpuint(utf16_code_units_byte_count("test🔥test", 4), ==, 4); // Fire emoji between test words
g_assert_cmpuint(utf16_code_units_byte_count("test🔥test", 6), ==, 8); // Fire emoji between test words
g_assert_cmpuint(utf16_code_units_byte_count("test🔥test", 7), ==, 9); // Fire emoji between test words
g_assert_cmpuint(utf16_code_units_byte_count("test🔥test", 10), ==, 12); // Fire emoji between test words
g_assert_cmpuint(utf16_code_units_byte_count("test🔥test", 11), ==, 12); // Fire emoji between test words
std::string empty;
std::string test("test");
std::string testtest("testtest");

Loading…
Cancel
Save