Browse Source

Support UTF-8 strings in string::ellipsize() and ellipsizePrefix(). Add truncatePrefix().

tags/v2.6.1
Andrew Belt 4 months ago
parent
commit
6c86c85d55
2 changed files with 80 additions and 22 deletions
  1. +8
    -4
      include/string.hpp
  2. +72
    -18
      src/string.cpp

+ 8
- 4
include/string.hpp View File

@@ -33,10 +33,14 @@ std::string lowercase(const std::string& s);
std::string uppercase(const std::string& s); std::string uppercase(const std::string& s);
/** Removes whitespace from beginning and end of string. */ /** Removes whitespace from beginning and end of string. */
std::string trim(const std::string& s); std::string trim(const std::string& s);
/** Truncates and adds "..." to the end of a string, not exceeding `len` characters. */
std::string ellipsize(const std::string& s, size_t len);
/** Truncates and adds "..." to the beginning of a string, not exceeding `len` characters. */
std::string ellipsizePrefix(const std::string& s, size_t len);
/** Truncates a string to not exceed a number of UTF-8 codepoints. */
std::string truncate(const std::string& s, size_t maxCodepoints);
/** Truncates the beginning of a string to not exceed a number of UTF-8 codepoints. */
std::string truncatePrefix(const std::string& s, size_t maxCodepoints);
/** Truncates and adds "…" to the end of a string, to not exceed a number of UTF-8 codepoints. */
std::string ellipsize(const std::string& s, size_t maxCodepoints);
/** Truncates and adds "…" to the beginning of a string, to not exceed a number of UTF-8 codepoints. */
std::string ellipsizePrefix(const std::string& s, size_t maxCodepoints);
/** Returns whether a string starts with the given substring. */ /** Returns whether a string starts with the given substring. */
bool startsWith(const std::string& str, const std::string& prefix); bool startsWith(const std::string& str, const std::string& prefix);
/** Returns whether a string ends with the given substring. */ /** Returns whether a string ends with the given substring. */


+ 72
- 18
src/string.cpp View File

@@ -77,19 +77,67 @@ std::string trim(const std::string& s) {
} }




std::string ellipsize(const std::string& s, size_t len) {
if (s.size() <= len)
std::string truncate(const std::string& s, size_t maxCodepoints) {
if (s.empty() || maxCodepoints == 0)
return "";

size_t pos = 0;
for (size_t i = 0; i < maxCodepoints; i++) {
// If remaining bytes are less than remaining codepoints, the string can't possibly be truncated.
if (s.size() - pos <= maxCodepoints - i)
return s;

pos = UTF8NextCodepoint(s, pos);
// Check if at end
if (pos >= s.size())
return s;
}

return s.substr(0, pos);
}


std::string truncatePrefix(const std::string& s, size_t maxCodepoints) {
if (s.empty() || maxCodepoints == 0)
return "";

size_t pos = s.size();
for (size_t i = 0; i < maxCodepoints; i++) {
// If remaining bytes are less than remaining codepoints, the string can't possibly be truncated.
if (pos <= maxCodepoints - i)
return s;

pos = UTF8PrevCodepoint(s, pos);
// Check if at beginning
if (pos == 0)
return s;
}

return s.substr(pos);
}


std::string ellipsize(const std::string& s, size_t maxCodepoints) {
if (maxCodepoints == 0)
return "";
std::string s2 = truncate(s, maxCodepoints);
if (s2 == s)
return s; return s;
else
return s.substr(0, len - 3) + "...";
// If string was truncated, back up a codepoint and add a Unicode ellipses character
size_t pos = UTF8PrevCodepoint(s2, s2.size());
return s2.substr(0, pos) + "…";
} }




std::string ellipsizePrefix(const std::string& s, size_t len) {
if (s.size() <= len)
std::string ellipsizePrefix(const std::string& s, size_t maxCodepoints) {
if (maxCodepoints == 0)
return "";
std::string s2 = truncatePrefix(s, maxCodepoints);
if (s2 == s)
return s; return s;
else
return "..." + s.substr(s.size() - (len - 3));
// If string was truncated, move forward a codepoint and prepend a Unicode ellipses character
size_t pos = UTF8NextCodepoint(s2, 0);
return "…" + s2.substr(pos);
} }




@@ -386,22 +434,28 @@ size_t UTF8NextCodepoint(const std::string& s8, size_t i) {
} }




size_t UTF8PrevCodepoint(const std::string& s8, size_t i) {
if (i == 0) return 0;
// Check the previous 3 bytes
for (size_t j = 1; j <= 3; j++) {
i--;
if (i == 0) return 0;
// Check out of bounds
if (i >= s8.size())
return s8.size();
/** Finds the byte index of the front of a codepoint by reversing until a non-continuation byte is found. */
static size_t UTF8StartCodepoint(const std::string& s8, size_t i) {
// Check out of bounds
if (i >= s8.size())
return s8.size();
while (i > 0) {
// Check for continuation byte 0b10xxxxxx // Check for continuation byte 0b10xxxxxx
if ((s8[i] & 0xc0) != 0x80) return i;
if ((s8[i] & 0xc0) != 0x80)
break;
i--;
} }
return i; return i;
} }




size_t UTF8PrevCodepoint(const std::string& s8, size_t i) {
if (i == 0)
return 0;
return UTF8StartCodepoint(s8, i - 1);
}


#if defined ARCH_WIN #if defined ARCH_WIN
std::string UTF16toUTF8(const std::wstring& w) { std::string UTF16toUTF8(const std::wstring& w) {
if (w.empty()) if (w.empty())


Loading…
Cancel
Save