diff --git a/include/string.hpp b/include/string.hpp index bfd46183..7817e7bb 100644 --- a/include/string.hpp +++ b/include/string.hpp @@ -33,10 +33,14 @@ std::string lowercase(const std::string& s); std::string uppercase(const std::string& s); /** Removes whitespace from beginning and end of string. */ std::string trim(const std::string& s); -/** Truncates and adds "..." to the end of a string, not exceeding `len` characters. */ -std::string ellipsize(const std::string& s, size_t len); -/** Truncates and adds "..." to the beginning of a string, not exceeding `len` characters. */ -std::string ellipsizePrefix(const std::string& s, size_t len); +/** Truncates a string to not exceed a number of UTF-8 codepoints. */ +std::string truncate(const std::string& s, size_t maxCodepoints); +/** Truncates the beginning of a string to not exceed a number of UTF-8 codepoints. */ +std::string truncatePrefix(const std::string& s, size_t maxCodepoints); +/** Truncates and adds "…" to the end of a string, to not exceed a number of UTF-8 codepoints. */ +std::string ellipsize(const std::string& s, size_t maxCodepoints); +/** Truncates and adds "…" to the beginning of a string, to not exceed a number of UTF-8 codepoints. */ +std::string ellipsizePrefix(const std::string& s, size_t maxCodepoints); /** Returns whether a string starts with the given substring. */ bool startsWith(const std::string& str, const std::string& prefix); /** Returns whether a string ends with the given substring. */ diff --git a/src/string.cpp b/src/string.cpp index bdd0f437..bbb73138 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -77,19 +77,67 @@ std::string trim(const std::string& s) { } -std::string ellipsize(const std::string& s, size_t len) { - if (s.size() <= len) +std::string truncate(const std::string& s, size_t maxCodepoints) { + if (s.empty() || maxCodepoints == 0) + return ""; + + size_t pos = 0; + for (size_t i = 0; i < maxCodepoints; i++) { + // If remaining bytes are less than remaining codepoints, the string can't possibly be truncated. + if (s.size() - pos <= maxCodepoints - i) + return s; + + pos = UTF8NextCodepoint(s, pos); + // Check if at end + if (pos >= s.size()) + return s; + } + + return s.substr(0, pos); +} + + +std::string truncatePrefix(const std::string& s, size_t maxCodepoints) { + if (s.empty() || maxCodepoints == 0) + return ""; + + size_t pos = s.size(); + for (size_t i = 0; i < maxCodepoints; i++) { + // If remaining bytes are less than remaining codepoints, the string can't possibly be truncated. + if (pos <= maxCodepoints - i) + return s; + + pos = UTF8PrevCodepoint(s, pos); + // Check if at beginning + if (pos == 0) + return s; + } + + return s.substr(pos); +} + + +std::string ellipsize(const std::string& s, size_t maxCodepoints) { + if (maxCodepoints == 0) + return ""; + std::string s2 = truncate(s, maxCodepoints); + if (s2 == s) return s; - else - return s.substr(0, len - 3) + "..."; + // If string was truncated, back up a codepoint and add a Unicode ellipses character + size_t pos = UTF8PrevCodepoint(s2, s2.size()); + return s2.substr(0, pos) + "…"; } -std::string ellipsizePrefix(const std::string& s, size_t len) { - if (s.size() <= len) +std::string ellipsizePrefix(const std::string& s, size_t maxCodepoints) { + if (maxCodepoints == 0) + return ""; + std::string s2 = truncatePrefix(s, maxCodepoints); + if (s2 == s) return s; - else - return "..." + s.substr(s.size() - (len - 3)); + // If string was truncated, move forward a codepoint and prepend a Unicode ellipses character + size_t pos = UTF8NextCodepoint(s2, 0); + return "…" + s2.substr(pos); } @@ -386,22 +434,28 @@ size_t UTF8NextCodepoint(const std::string& s8, size_t i) { } -size_t UTF8PrevCodepoint(const std::string& s8, size_t i) { - if (i == 0) return 0; - // Check the previous 3 bytes - for (size_t j = 1; j <= 3; j++) { - i--; - if (i == 0) return 0; - // Check out of bounds - if (i >= s8.size()) - return s8.size(); +/** Finds the byte index of the front of a codepoint by reversing until a non-continuation byte is found. */ +static size_t UTF8StartCodepoint(const std::string& s8, size_t i) { + // Check out of bounds + if (i >= s8.size()) + return s8.size(); + while (i > 0) { // Check for continuation byte 0b10xxxxxx - if ((s8[i] & 0xc0) != 0x80) return i; + if ((s8[i] & 0xc0) != 0x80) + break; + i--; } return i; } +size_t UTF8PrevCodepoint(const std::string& s8, size_t i) { + if (i == 0) + return 0; + return UTF8StartCodepoint(s8, i - 1); +} + + #if defined ARCH_WIN std::string UTF16toUTF8(const std::wstring& w) { if (w.empty())