Audio plugin host https://kx.studio/carla
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CharacterFunctions.h 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
  1. /*
  2. ==============================================================================
  3. This file is part of the Water library.
  4. Copyright (c) 2016 ROLI Ltd.
  5. Copyright (C) 2017 Filipe Coelho <falktx@falktx.com>
  6. Permission is granted to use this software under the terms of the ISC license
  7. http://www.isc.org/downloads/software-support-policy/isc-license/
  8. Permission to use, copy, modify, and/or distribute this software for any
  9. purpose with or without fee is hereby granted, provided that the above
  10. copyright notice and this permission notice appear in all copies.
  11. THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
  12. TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  13. FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
  14. OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
  15. USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
  16. TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  17. OF THIS SOFTWARE.
  18. ==============================================================================
  19. */
  20. #ifndef WATER_CHARACTERFUNCTIONS_H_INCLUDED
  21. #define WATER_CHARACTERFUNCTIONS_H_INCLUDED
  22. #include "../memory/Memory.h"
  23. #include <algorithm>
  24. #include <limits>
  25. namespace water {
  26. /** A platform-independent 32-bit unicode character type. */
  27. typedef uint32 water_uchar;
  28. //==============================================================================
  29. /**
  30. A collection of functions for manipulating characters and character strings.
  31. Most of these methods are designed for internal use by the String and CharPointer
  32. classes, but some of them may be useful to call directly.
  33. @see String, CharPointer_UTF8, CharPointer_UTF16, CharPointer_UTF32
  34. */
  35. class CharacterFunctions
  36. {
  37. public:
  38. //==============================================================================
  39. /** Converts a character to upper-case. */
  40. static water_uchar toUpperCase (water_uchar character) noexcept;
  41. /** Converts a character to lower-case. */
  42. static water_uchar toLowerCase (water_uchar character) noexcept;
  43. /** Checks whether a unicode character is upper-case. */
  44. static bool isUpperCase (water_uchar character) noexcept;
  45. /** Checks whether a unicode character is lower-case. */
  46. static bool isLowerCase (water_uchar character) noexcept;
  47. /** Checks whether a character is whitespace. */
  48. static bool isWhitespace (char character) noexcept;
  49. /** Checks whether a character is whitespace. */
  50. static bool isWhitespace (water_uchar character) noexcept;
  51. /** Checks whether a character is a digit. */
  52. static bool isDigit (char character) noexcept;
  53. /** Checks whether a character is a digit. */
  54. static bool isDigit (water_uchar character) noexcept;
  55. /** Checks whether a character is alphabetic. */
  56. static bool isLetter (char character) noexcept;
  57. /** Checks whether a character is alphabetic. */
  58. static bool isLetter (water_uchar character) noexcept;
  59. /** Checks whether a character is alphabetic or numeric. */
  60. static bool isLetterOrDigit (char character) noexcept;
  61. /** Checks whether a character is alphabetic or numeric. */
  62. static bool isLetterOrDigit (water_uchar character) noexcept;
  63. /** Checks whether a character is a printable character, i.e. alphabetic, numeric,
  64. a punctuation character or a space.
  65. */
  66. static bool isPrintable (char character) noexcept;
  67. /** Checks whether a character is a printable character, i.e. alphabetic, numeric,
  68. a punctuation character or a space.
  69. */
  70. static bool isPrintable (water_uchar character) noexcept;
  71. /** Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit. */
  72. static int getHexDigitValue (water_uchar digit) noexcept;
  73. /** Converts a byte of Windows 1252 codepage to unicode. */
  74. static water_uchar getUnicodeCharFromWindows1252Codepage (uint8 windows1252Char) noexcept;
  75. //==============================================================================
  76. /** Parses a character string to read a floating-point number.
  77. Note that this will advance the pointer that is passed in, leaving it at
  78. the end of the number.
  79. */
  80. template <typename CharPointerType>
  81. static double readDoubleValue (CharPointerType& text) noexcept
  82. {
  83. double result[3] = { 0 }, accumulator[2] = { 0 };
  84. int exponentAdjustment[2] = { 0 }, exponentAccumulator[2] = { -1, -1 };
  85. int exponent = 0, decPointIndex = 0, digit = 0;
  86. int lastDigit = 0, numSignificantDigits = 0;
  87. bool isNegative = false, digitsFound = false;
  88. const int maxSignificantDigits = 15 + 2;
  89. text = text.findEndOfWhitespace();
  90. water_uchar c = *text;
  91. switch (c)
  92. {
  93. case '-': isNegative = true; // fall-through..
  94. case '+': c = *++text;
  95. }
  96. switch (c)
  97. {
  98. case 'n':
  99. case 'N':
  100. if ((text[1] == 'a' || text[1] == 'A') && (text[2] == 'n' || text[2] == 'N'))
  101. return std::numeric_limits<double>::quiet_NaN();
  102. break;
  103. case 'i':
  104. case 'I':
  105. if ((text[1] == 'n' || text[1] == 'N') && (text[2] == 'f' || text[2] == 'F'))
  106. return std::numeric_limits<double>::infinity();
  107. break;
  108. }
  109. for (;;)
  110. {
  111. if (text.isDigit())
  112. {
  113. lastDigit = digit;
  114. digit = (int) text.getAndAdvance() - '0';
  115. digitsFound = true;
  116. if (decPointIndex != 0)
  117. exponentAdjustment[1]++;
  118. if (numSignificantDigits == 0 && digit == 0)
  119. continue;
  120. if (++numSignificantDigits > maxSignificantDigits)
  121. {
  122. if (digit > 5)
  123. ++accumulator [decPointIndex];
  124. else if (digit == 5 && (lastDigit & 1) != 0)
  125. ++accumulator [decPointIndex];
  126. if (decPointIndex > 0)
  127. exponentAdjustment[1]--;
  128. else
  129. exponentAdjustment[0]++;
  130. while (text.isDigit())
  131. {
  132. ++text;
  133. if (decPointIndex == 0)
  134. exponentAdjustment[0]++;
  135. }
  136. }
  137. else
  138. {
  139. const double maxAccumulatorValue = (double) ((std::numeric_limits<unsigned int>::max() - 9) / 10);
  140. if (accumulator [decPointIndex] > maxAccumulatorValue)
  141. {
  142. result [decPointIndex] = mulexp10 (result [decPointIndex], exponentAccumulator [decPointIndex])
  143. + accumulator [decPointIndex];
  144. accumulator [decPointIndex] = 0;
  145. exponentAccumulator [decPointIndex] = 0;
  146. }
  147. accumulator [decPointIndex] = accumulator[decPointIndex] * 10 + digit;
  148. exponentAccumulator [decPointIndex]++;
  149. }
  150. }
  151. else if (decPointIndex == 0 && *text == '.')
  152. {
  153. ++text;
  154. decPointIndex = 1;
  155. if (numSignificantDigits > maxSignificantDigits)
  156. {
  157. while (text.isDigit())
  158. ++text;
  159. break;
  160. }
  161. }
  162. else
  163. {
  164. break;
  165. }
  166. }
  167. result[0] = mulexp10 (result[0], exponentAccumulator[0]) + accumulator[0];
  168. if (decPointIndex != 0)
  169. result[1] = mulexp10 (result[1], exponentAccumulator[1]) + accumulator[1];
  170. c = *text;
  171. if ((c == 'e' || c == 'E') && digitsFound)
  172. {
  173. bool negativeExponent = false;
  174. switch (*++text)
  175. {
  176. case '-': negativeExponent = true; // fall-through..
  177. case '+': ++text;
  178. }
  179. while (text.isDigit())
  180. exponent = (exponent * 10) + ((int) text.getAndAdvance() - '0');
  181. if (negativeExponent)
  182. exponent = -exponent;
  183. }
  184. double r = mulexp10 (result[0], exponent + exponentAdjustment[0]);
  185. if (decPointIndex != 0)
  186. r += mulexp10 (result[1], exponent - exponentAdjustment[1]);
  187. return isNegative ? -r : r;
  188. }
  189. /** Parses a character string, to read a floating-point value. */
  190. template <typename CharPointerType>
  191. static double getDoubleValue (CharPointerType text) noexcept
  192. {
  193. return readDoubleValue (text);
  194. }
  195. //==============================================================================
  196. /** Parses a character string, to read an integer value. */
  197. template <typename IntType, typename CharPointerType>
  198. static IntType getIntValue (const CharPointerType text) noexcept
  199. {
  200. IntType v = 0;
  201. CharPointerType s (text.findEndOfWhitespace());
  202. const bool isNeg = *s == '-';
  203. if (isNeg)
  204. ++s;
  205. for (;;)
  206. {
  207. const water_uchar c = s.getAndAdvance();
  208. if (c >= '0' && c <= '9')
  209. v = v * 10 + (IntType) (c - '0');
  210. else
  211. break;
  212. }
  213. return isNeg ? -v : v;
  214. }
  215. template <typename ResultType>
  216. struct HexParser
  217. {
  218. template <typename CharPointerType>
  219. static ResultType parse (CharPointerType t) noexcept
  220. {
  221. ResultType result = 0;
  222. while (! t.isEmpty())
  223. {
  224. const int hexValue = CharacterFunctions::getHexDigitValue (t.getAndAdvance());
  225. if (hexValue >= 0)
  226. result = (result << 4) | hexValue;
  227. }
  228. return result;
  229. }
  230. };
  231. //==============================================================================
  232. /** Counts the number of characters in a given string, stopping if the count exceeds
  233. a specified limit. */
  234. template <typename CharPointerType>
  235. static size_t lengthUpTo (CharPointerType text, const size_t maxCharsToCount) noexcept
  236. {
  237. size_t len = 0;
  238. while (len < maxCharsToCount && text.getAndAdvance() != 0)
  239. ++len;
  240. return len;
  241. }
  242. /** Counts the number of characters in a given string, stopping if the count exceeds
  243. a specified end-pointer. */
  244. template <typename CharPointerType>
  245. static size_t lengthUpTo (CharPointerType start, const CharPointerType end) noexcept
  246. {
  247. size_t len = 0;
  248. while (start < end && start.getAndAdvance() != 0)
  249. ++len;
  250. return len;
  251. }
  252. /** Copies null-terminated characters from one string to another. */
  253. template <typename DestCharPointerType, typename SrcCharPointerType>
  254. static void copyAll (DestCharPointerType& dest, SrcCharPointerType src) noexcept
  255. {
  256. while (water_uchar c = src.getAndAdvance())
  257. dest.write (c);
  258. dest.writeNull();
  259. }
  260. /** Copies characters from one string to another, up to a null terminator
  261. or a given byte size limit. */
  262. template <typename DestCharPointerType, typename SrcCharPointerType>
  263. static size_t copyWithDestByteLimit (DestCharPointerType& dest, SrcCharPointerType src, size_t maxBytesToWrite) noexcept
  264. {
  265. typename DestCharPointerType::CharType const* const startAddress = dest.getAddress();
  266. ssize_t maxBytes = (ssize_t) maxBytesToWrite;
  267. maxBytes -= sizeof (typename DestCharPointerType::CharType); // (allow for a terminating null)
  268. for (;;)
  269. {
  270. const water_uchar c = src.getAndAdvance();
  271. const size_t bytesNeeded = DestCharPointerType::getBytesRequiredFor (c);
  272. maxBytes -= bytesNeeded;
  273. if (c == 0 || maxBytes < 0)
  274. break;
  275. dest.write (c);
  276. }
  277. dest.writeNull();
  278. return (size_t) getAddressDifference (dest.getAddress(), startAddress)
  279. + sizeof (typename DestCharPointerType::CharType);
  280. }
  281. /** Copies characters from one string to another, up to a null terminator
  282. or a given maximum number of characters. */
  283. template <typename DestCharPointerType, typename SrcCharPointerType>
  284. static void copyWithCharLimit (DestCharPointerType& dest, SrcCharPointerType src, int maxChars) noexcept
  285. {
  286. while (--maxChars > 0)
  287. {
  288. const water_uchar c = src.getAndAdvance();
  289. if (c == 0)
  290. break;
  291. dest.write (c);
  292. }
  293. dest.writeNull();
  294. }
  295. /** Compares two characters. */
  296. static inline int compare (water_uchar char1, water_uchar char2) noexcept
  297. {
  298. if (int diff = static_cast<int> (char1) - static_cast<int> (char2))
  299. return diff < 0 ? -1 : 1;
  300. return 0;
  301. }
  302. /** Compares two null-terminated character strings. */
  303. template <typename CharPointerType1, typename CharPointerType2>
  304. static int compare (CharPointerType1 s1, CharPointerType2 s2) noexcept
  305. {
  306. for (;;)
  307. {
  308. const water_uchar c1 = s1.getAndAdvance();
  309. if (int diff = compare (c1, s2.getAndAdvance()))
  310. return diff;
  311. if (c1 == 0)
  312. break;
  313. }
  314. return 0;
  315. }
  316. /** Compares two null-terminated character strings, up to a given number of characters. */
  317. template <typename CharPointerType1, typename CharPointerType2>
  318. static int compareUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
  319. {
  320. while (--maxChars >= 0)
  321. {
  322. const water_uchar c1 = s1.getAndAdvance();
  323. if (int diff = compare (c1, s2.getAndAdvance()))
  324. return diff;
  325. if (c1 == 0)
  326. break;
  327. }
  328. return 0;
  329. }
  330. /** Compares two characters, using a case-independant match. */
  331. static inline int compareIgnoreCase (water_uchar char1, water_uchar char2) noexcept
  332. {
  333. return char1 != char2 ? compare (toUpperCase (char1), toUpperCase (char2)) : 0;
  334. }
  335. /** Compares two null-terminated character strings, using a case-independant match. */
  336. template <typename CharPointerType1, typename CharPointerType2>
  337. static int compareIgnoreCase (CharPointerType1 s1, CharPointerType2 s2) noexcept
  338. {
  339. for (;;)
  340. {
  341. const water_uchar c1 = s1.getAndAdvance();
  342. if (int diff = compareIgnoreCase (c1, s2.getAndAdvance()))
  343. return diff;
  344. if (c1 == 0)
  345. break;
  346. }
  347. return 0;
  348. }
  349. /** Compares two null-terminated character strings, using a case-independent match. */
  350. template <typename CharPointerType1, typename CharPointerType2>
  351. static int compareIgnoreCaseUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
  352. {
  353. while (--maxChars >= 0)
  354. {
  355. const water_uchar c1 = s1.getAndAdvance();
  356. if (int diff = compareIgnoreCase (c1, s2.getAndAdvance()))
  357. return diff;
  358. if (c1 == 0)
  359. break;
  360. }
  361. return 0;
  362. }
  363. /** Finds the character index of a given substring in another string.
  364. Returns -1 if the substring is not found.
  365. */
  366. template <typename CharPointerType1, typename CharPointerType2>
  367. static int indexOf (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
  368. {
  369. int index = 0;
  370. const int substringLength = (int) substringToLookFor.length();
  371. for (;;)
  372. {
  373. if (textToSearch.compareUpTo (substringToLookFor, substringLength) == 0)
  374. return index;
  375. if (textToSearch.getAndAdvance() == 0)
  376. return -1;
  377. ++index;
  378. }
  379. }
  380. /** Returns a pointer to the first occurrence of a substring in a string.
  381. If the substring is not found, this will return a pointer to the string's
  382. null terminator.
  383. */
  384. template <typename CharPointerType1, typename CharPointerType2>
  385. static CharPointerType1 find (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
  386. {
  387. const int substringLength = (int) substringToLookFor.length();
  388. while (textToSearch.compareUpTo (substringToLookFor, substringLength) != 0
  389. && ! textToSearch.isEmpty())
  390. ++textToSearch;
  391. return textToSearch;
  392. }
  393. /** Returns a pointer to the first occurrence of a substring in a string.
  394. If the substring is not found, this will return a pointer to the string's
  395. null terminator.
  396. */
  397. template <typename CharPointerType>
  398. static CharPointerType find (CharPointerType textToSearch, const water_uchar charToLookFor) noexcept
  399. {
  400. for (;; ++textToSearch)
  401. {
  402. const water_uchar c = *textToSearch;
  403. if (c == charToLookFor || c == 0)
  404. break;
  405. }
  406. return textToSearch;
  407. }
  408. /** Finds the character index of a given substring in another string, using
  409. a case-independent match.
  410. Returns -1 if the substring is not found.
  411. */
  412. template <typename CharPointerType1, typename CharPointerType2>
  413. static int indexOfIgnoreCase (CharPointerType1 haystack, const CharPointerType2 needle) noexcept
  414. {
  415. int index = 0;
  416. const int needleLength = (int) needle.length();
  417. for (;;)
  418. {
  419. if (haystack.compareIgnoreCaseUpTo (needle, needleLength) == 0)
  420. return index;
  421. if (haystack.getAndAdvance() == 0)
  422. return -1;
  423. ++index;
  424. }
  425. }
  426. /** Finds the character index of a given character in another string.
  427. Returns -1 if the character is not found.
  428. */
  429. template <typename Type>
  430. static int indexOfChar (Type text, const water_uchar charToFind) noexcept
  431. {
  432. int i = 0;
  433. while (! text.isEmpty())
  434. {
  435. if (text.getAndAdvance() == charToFind)
  436. return i;
  437. ++i;
  438. }
  439. return -1;
  440. }
  441. /** Finds the character index of a given character in another string, using
  442. a case-independent match.
  443. Returns -1 if the character is not found.
  444. */
  445. template <typename Type>
  446. static int indexOfCharIgnoreCase (Type text, water_uchar charToFind) noexcept
  447. {
  448. charToFind = CharacterFunctions::toLowerCase (charToFind);
  449. int i = 0;
  450. while (! text.isEmpty())
  451. {
  452. if (text.toLowerCase() == charToFind)
  453. return i;
  454. ++text;
  455. ++i;
  456. }
  457. return -1;
  458. }
  459. /** Returns a pointer to the first non-whitespace character in a string.
  460. If the string contains only whitespace, this will return a pointer
  461. to its null terminator.
  462. */
  463. template <typename Type>
  464. static Type findEndOfWhitespace (Type text) noexcept
  465. {
  466. while (text.isWhitespace())
  467. ++text;
  468. return text;
  469. }
  470. /** Returns a pointer to the first character in the string which is found in
  471. the breakCharacters string.
  472. */
  473. template <typename Type, typename BreakType>
  474. static Type findEndOfToken (Type text, const BreakType breakCharacters, const Type quoteCharacters)
  475. {
  476. water_uchar currentQuoteChar = 0;
  477. while (! text.isEmpty())
  478. {
  479. const water_uchar c = text.getAndAdvance();
  480. if (currentQuoteChar == 0 && breakCharacters.indexOf (c) >= 0)
  481. {
  482. --text;
  483. break;
  484. }
  485. if (quoteCharacters.indexOf (c) >= 0)
  486. {
  487. if (currentQuoteChar == 0)
  488. currentQuoteChar = c;
  489. else if (currentQuoteChar == c)
  490. currentQuoteChar = 0;
  491. }
  492. }
  493. return text;
  494. }
  495. private:
  496. static double mulexp10 (const double value, int exponent) noexcept;
  497. };
  498. }
  499. #endif // WATER_CHARACTERFUNCTIONS_H_INCLUDED