|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621 |
- /*
- ==============================================================================
-
- This file is part of the Water library.
- Copyright (c) 2016 ROLI Ltd.
- Copyright (C) 2017 Filipe Coelho <falktx@falktx.com>
-
- Permission is granted to use this software under the terms of the ISC license
- http://www.isc.org/downloads/software-support-policy/isc-license/
-
- Permission to use, copy, modify, and/or distribute this software for any
- purpose with or without fee is hereby granted, provided that the above
- copyright notice and this permission notice appear in all copies.
-
- THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
- TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
- OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- OF THIS SOFTWARE.
-
- ==============================================================================
- */
-
- #ifndef WATER_CHARACTERFUNCTIONS_H_INCLUDED
- #define WATER_CHARACTERFUNCTIONS_H_INCLUDED
-
- #include "../memory/Memory.h"
-
- #include <algorithm>
- #include <limits>
- #include <sys/types.h>
-
- namespace water {
-
- /** A platform-independent 32-bit unicode character type. */
- typedef uint32 water_uchar;
-
- //==============================================================================
- /**
- A collection of functions for manipulating characters and character strings.
-
- Most of these methods are designed for internal use by the String and CharPointer
- classes, but some of them may be useful to call directly.
-
- @see String, CharPointer_UTF8, CharPointer_UTF16, CharPointer_UTF32
- */
- class CharacterFunctions
- {
- public:
- //==============================================================================
- /** Converts a character to upper-case. */
- static water_uchar toUpperCase (water_uchar character) noexcept;
- /** Converts a character to lower-case. */
- static water_uchar toLowerCase (water_uchar character) noexcept;
-
- /** Checks whether a unicode character is upper-case. */
- static bool isUpperCase (water_uchar character) noexcept;
- /** Checks whether a unicode character is lower-case. */
- static bool isLowerCase (water_uchar character) noexcept;
-
- /** Checks whether a character is whitespace. */
- static bool isWhitespace (char character) noexcept;
- /** Checks whether a character is whitespace. */
- static bool isWhitespace (water_uchar character) noexcept;
-
- /** Checks whether a character is a digit. */
- static bool isDigit (char character) noexcept;
- /** Checks whether a character is a digit. */
- static bool isDigit (water_uchar character) noexcept;
-
- /** Checks whether a character is alphabetic. */
- static bool isLetter (char character) noexcept;
- /** Checks whether a character is alphabetic. */
- static bool isLetter (water_uchar character) noexcept;
-
- /** Checks whether a character is alphabetic or numeric. */
- static bool isLetterOrDigit (char character) noexcept;
- /** Checks whether a character is alphabetic or numeric. */
- static bool isLetterOrDigit (water_uchar character) noexcept;
-
- /** Checks whether a character is a printable character, i.e. alphabetic, numeric,
- a punctuation character or a space.
- */
- static bool isPrintable (char character) noexcept;
-
- /** Checks whether a character is a printable character, i.e. alphabetic, numeric,
- a punctuation character or a space.
- */
- static bool isPrintable (water_uchar character) noexcept;
-
- /** Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit. */
- static int getHexDigitValue (water_uchar digit) noexcept;
-
- /** Converts a byte of Windows 1252 codepage to unicode. */
- static water_uchar getUnicodeCharFromWindows1252Codepage (uint8 windows1252Char) noexcept;
-
- //==============================================================================
- /** Parses a character string to read a floating-point number.
- Note that this will advance the pointer that is passed in, leaving it at
- the end of the number.
- */
- template <typename CharPointerType>
- static double readDoubleValue (CharPointerType& text) noexcept
- {
- double result[3] = { 0 }, accumulator[2] = { 0 };
- int exponentAdjustment[2] = { 0 }, exponentAccumulator[2] = { -1, -1 };
- int exponent = 0, decPointIndex = 0, digit = 0;
- int lastDigit = 0, numSignificantDigits = 0;
- bool isNegative = false, digitsFound = false;
- const int maxSignificantDigits = 15 + 2;
-
- text = text.findEndOfWhitespace();
- water_uchar c = *text;
-
- switch (c)
- {
- case '-': isNegative = true; // fall-through..
- case '+': c = *++text;
- }
-
- switch (c)
- {
- case 'n':
- case 'N':
- if ((text[1] == 'a' || text[1] == 'A') && (text[2] == 'n' || text[2] == 'N'))
- return std::numeric_limits<double>::quiet_NaN();
- break;
-
- case 'i':
- case 'I':
- if ((text[1] == 'n' || text[1] == 'N') && (text[2] == 'f' || text[2] == 'F'))
- return std::numeric_limits<double>::infinity();
- break;
- }
-
- for (;;)
- {
- if (text.isDigit())
- {
- lastDigit = digit;
- digit = (int) text.getAndAdvance() - '0';
- digitsFound = true;
-
- if (decPointIndex != 0)
- exponentAdjustment[1]++;
-
- if (numSignificantDigits == 0 && digit == 0)
- continue;
-
- if (++numSignificantDigits > maxSignificantDigits)
- {
- if (digit > 5)
- ++accumulator [decPointIndex];
- else if (digit == 5 && (lastDigit & 1) != 0)
- ++accumulator [decPointIndex];
-
- if (decPointIndex > 0)
- exponentAdjustment[1]--;
- else
- exponentAdjustment[0]++;
-
- while (text.isDigit())
- {
- ++text;
- if (decPointIndex == 0)
- exponentAdjustment[0]++;
- }
- }
- else
- {
- const double maxAccumulatorValue = (double) ((std::numeric_limits<unsigned int>::max() - 9) / 10);
- if (accumulator [decPointIndex] > maxAccumulatorValue)
- {
- result [decPointIndex] = mulexp10 (result [decPointIndex], exponentAccumulator [decPointIndex])
- + accumulator [decPointIndex];
- accumulator [decPointIndex] = 0;
- exponentAccumulator [decPointIndex] = 0;
- }
-
- accumulator [decPointIndex] = accumulator[decPointIndex] * 10 + digit;
- exponentAccumulator [decPointIndex]++;
- }
- }
- else if (decPointIndex == 0 && *text == '.')
- {
- ++text;
- decPointIndex = 1;
-
- if (numSignificantDigits > maxSignificantDigits)
- {
- while (text.isDigit())
- ++text;
- break;
- }
- }
- else
- {
- break;
- }
- }
-
- result[0] = mulexp10 (result[0], exponentAccumulator[0]) + accumulator[0];
-
- if (decPointIndex != 0)
- result[1] = mulexp10 (result[1], exponentAccumulator[1]) + accumulator[1];
-
- c = *text;
- if ((c == 'e' || c == 'E') && digitsFound)
- {
- bool negativeExponent = false;
-
- switch (*++text)
- {
- case '-': negativeExponent = true; // fall-through..
- case '+': ++text;
- }
-
- while (text.isDigit())
- exponent = (exponent * 10) + ((int) text.getAndAdvance() - '0');
-
- if (negativeExponent)
- exponent = -exponent;
- }
-
- double r = mulexp10 (result[0], exponent + exponentAdjustment[0]);
- if (decPointIndex != 0)
- r += mulexp10 (result[1], exponent - exponentAdjustment[1]);
-
- return isNegative ? -r : r;
- }
-
- /** Parses a character string, to read a floating-point value. */
- template <typename CharPointerType>
- static double getDoubleValue (CharPointerType text) noexcept
- {
- return readDoubleValue (text);
- }
-
- //==============================================================================
- /** Parses a character string, to read an integer value. */
- template <typename IntType, typename CharPointerType>
- static IntType getIntValue (const CharPointerType text) noexcept
- {
- IntType v = 0;
- CharPointerType s (text.findEndOfWhitespace());
-
- const bool isNeg = *s == '-';
- if (isNeg)
- ++s;
-
- for (;;)
- {
- const water_uchar c = s.getAndAdvance();
-
- if (c >= '0' && c <= '9')
- v = v * 10 + (IntType) (c - '0');
- else
- break;
- }
-
- return isNeg ? -v : v;
- }
-
- template <typename ResultType>
- struct HexParser
- {
- template <typename CharPointerType>
- static ResultType parse (CharPointerType t) noexcept
- {
- ResultType result = 0;
-
- while (! t.isEmpty())
- {
- const int hexValue = CharacterFunctions::getHexDigitValue (t.getAndAdvance());
-
- if (hexValue >= 0)
- result = (result << 4) | hexValue;
- }
-
- return result;
- }
- };
-
- //==============================================================================
- /** Counts the number of characters in a given string, stopping if the count exceeds
- a specified limit. */
- template <typename CharPointerType>
- static size_t lengthUpTo (CharPointerType text, const size_t maxCharsToCount) noexcept
- {
- size_t len = 0;
-
- while (len < maxCharsToCount && text.getAndAdvance() != 0)
- ++len;
-
- return len;
- }
-
- /** Counts the number of characters in a given string, stopping if the count exceeds
- a specified end-pointer. */
- template <typename CharPointerType>
- static size_t lengthUpTo (CharPointerType start, const CharPointerType end) noexcept
- {
- size_t len = 0;
-
- while (start < end && start.getAndAdvance() != 0)
- ++len;
-
- return len;
- }
-
- /** Copies null-terminated characters from one string to another. */
- template <typename DestCharPointerType, typename SrcCharPointerType>
- static void copyAll (DestCharPointerType& dest, SrcCharPointerType src) noexcept
- {
- while (water_uchar c = src.getAndAdvance())
- dest.write (c);
-
- dest.writeNull();
- }
-
- /** Copies characters from one string to another, up to a null terminator
- or a given byte size limit. */
- template <typename DestCharPointerType, typename SrcCharPointerType>
- static size_t copyWithDestByteLimit (DestCharPointerType& dest, SrcCharPointerType src, size_t maxBytesToWrite) noexcept
- {
- typename DestCharPointerType::CharType const* const startAddress = dest.getAddress();
- ssize_t maxBytes = (ssize_t) maxBytesToWrite;
- maxBytes -= sizeof (typename DestCharPointerType::CharType); // (allow for a terminating null)
-
- for (;;)
- {
- const water_uchar c = src.getAndAdvance();
- const size_t bytesNeeded = DestCharPointerType::getBytesRequiredFor (c);
-
- maxBytes -= bytesNeeded;
- if (c == 0 || maxBytes < 0)
- break;
-
- dest.write (c);
- }
-
- dest.writeNull();
-
- return (size_t) getAddressDifference (dest.getAddress(), startAddress)
- + sizeof (typename DestCharPointerType::CharType);
- }
-
- /** Copies characters from one string to another, up to a null terminator
- or a given maximum number of characters. */
- template <typename DestCharPointerType, typename SrcCharPointerType>
- static void copyWithCharLimit (DestCharPointerType& dest, SrcCharPointerType src, int maxChars) noexcept
- {
- while (--maxChars > 0)
- {
- const water_uchar c = src.getAndAdvance();
- if (c == 0)
- break;
-
- dest.write (c);
- }
-
- dest.writeNull();
- }
-
- /** Compares two characters. */
- static inline int compare (water_uchar char1, water_uchar char2) noexcept
- {
- if (int diff = static_cast<int> (char1) - static_cast<int> (char2))
- return diff < 0 ? -1 : 1;
-
- return 0;
- }
-
- /** Compares two null-terminated character strings. */
- template <typename CharPointerType1, typename CharPointerType2>
- static int compare (CharPointerType1 s1, CharPointerType2 s2) noexcept
- {
- for (;;)
- {
- const water_uchar c1 = s1.getAndAdvance();
-
- if (int diff = compare (c1, s2.getAndAdvance()))
- return diff;
-
- if (c1 == 0)
- break;
- }
-
- return 0;
- }
-
- /** Compares two null-terminated character strings, up to a given number of characters. */
- template <typename CharPointerType1, typename CharPointerType2>
- static int compareUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
- {
- while (--maxChars >= 0)
- {
- const water_uchar c1 = s1.getAndAdvance();
-
- if (int diff = compare (c1, s2.getAndAdvance()))
- return diff;
-
- if (c1 == 0)
- break;
- }
-
- return 0;
- }
-
- /** Compares two characters, using a case-independant match. */
- static inline int compareIgnoreCase (water_uchar char1, water_uchar char2) noexcept
- {
- return char1 != char2 ? compare (toUpperCase (char1), toUpperCase (char2)) : 0;
- }
-
- /** Compares two null-terminated character strings, using a case-independant match. */
- template <typename CharPointerType1, typename CharPointerType2>
- static int compareIgnoreCase (CharPointerType1 s1, CharPointerType2 s2) noexcept
- {
- for (;;)
- {
- const water_uchar c1 = s1.getAndAdvance();
-
- if (int diff = compareIgnoreCase (c1, s2.getAndAdvance()))
- return diff;
-
- if (c1 == 0)
- break;
- }
-
- return 0;
- }
-
- /** Compares two null-terminated character strings, using a case-independent match. */
- template <typename CharPointerType1, typename CharPointerType2>
- static int compareIgnoreCaseUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
- {
- while (--maxChars >= 0)
- {
- const water_uchar c1 = s1.getAndAdvance();
-
- if (int diff = compareIgnoreCase (c1, s2.getAndAdvance()))
- return diff;
-
- if (c1 == 0)
- break;
- }
-
- return 0;
- }
-
- /** Finds the character index of a given substring in another string.
- Returns -1 if the substring is not found.
- */
- template <typename CharPointerType1, typename CharPointerType2>
- static int indexOf (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
- {
- int index = 0;
- const int substringLength = (int) substringToLookFor.length();
-
- for (;;)
- {
- if (textToSearch.compareUpTo (substringToLookFor, substringLength) == 0)
- return index;
-
- if (textToSearch.getAndAdvance() == 0)
- return -1;
-
- ++index;
- }
- }
-
- /** Returns a pointer to the first occurrence of a substring in a string.
- If the substring is not found, this will return a pointer to the string's
- null terminator.
- */
- template <typename CharPointerType1, typename CharPointerType2>
- static CharPointerType1 find (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
- {
- const int substringLength = (int) substringToLookFor.length();
-
- while (textToSearch.compareUpTo (substringToLookFor, substringLength) != 0
- && ! textToSearch.isEmpty())
- ++textToSearch;
-
- return textToSearch;
- }
-
- /** Returns a pointer to the first occurrence of a substring in a string.
- If the substring is not found, this will return a pointer to the string's
- null terminator.
- */
- template <typename CharPointerType>
- static CharPointerType find (CharPointerType textToSearch, const water_uchar charToLookFor) noexcept
- {
- for (;; ++textToSearch)
- {
- const water_uchar c = *textToSearch;
-
- if (c == charToLookFor || c == 0)
- break;
- }
-
- return textToSearch;
- }
-
- /** Finds the character index of a given substring in another string, using
- a case-independent match.
- Returns -1 if the substring is not found.
- */
- template <typename CharPointerType1, typename CharPointerType2>
- static int indexOfIgnoreCase (CharPointerType1 haystack, const CharPointerType2 needle) noexcept
- {
- int index = 0;
- const int needleLength = (int) needle.length();
-
- for (;;)
- {
- if (haystack.compareIgnoreCaseUpTo (needle, needleLength) == 0)
- return index;
-
- if (haystack.getAndAdvance() == 0)
- return -1;
-
- ++index;
- }
- }
-
- /** Finds the character index of a given character in another string.
- Returns -1 if the character is not found.
- */
- template <typename Type>
- static int indexOfChar (Type text, const water_uchar charToFind) noexcept
- {
- int i = 0;
-
- while (! text.isEmpty())
- {
- if (text.getAndAdvance() == charToFind)
- return i;
-
- ++i;
- }
-
- return -1;
- }
-
- /** Finds the character index of a given character in another string, using
- a case-independent match.
- Returns -1 if the character is not found.
- */
- template <typename Type>
- static int indexOfCharIgnoreCase (Type text, water_uchar charToFind) noexcept
- {
- charToFind = CharacterFunctions::toLowerCase (charToFind);
- int i = 0;
-
- while (! text.isEmpty())
- {
- if (text.toLowerCase() == charToFind)
- return i;
-
- ++text;
- ++i;
- }
-
- return -1;
- }
-
- /** Returns a pointer to the first non-whitespace character in a string.
- If the string contains only whitespace, this will return a pointer
- to its null terminator.
- */
- template <typename Type>
- static Type findEndOfWhitespace (Type text) noexcept
- {
- while (text.isWhitespace())
- ++text;
-
- return text;
- }
-
- /** Returns a pointer to the first character in the string which is found in
- the breakCharacters string.
- */
- template <typename Type, typename BreakType>
- static Type findEndOfToken (Type text, const BreakType breakCharacters, const Type quoteCharacters)
- {
- water_uchar currentQuoteChar = 0;
-
- while (! text.isEmpty())
- {
- const water_uchar c = text.getAndAdvance();
-
- if (currentQuoteChar == 0 && breakCharacters.indexOf (c) >= 0)
- {
- --text;
- break;
- }
-
- if (quoteCharacters.indexOf (c) >= 0)
- {
- if (currentQuoteChar == 0)
- currentQuoteChar = c;
- else if (currentQuoteChar == c)
- currentQuoteChar = 0;
- }
- }
-
- return text;
- }
-
- private:
- static double mulexp10 (const double value, int exponent) noexcept;
- };
-
- }
-
- #endif // WATER_CHARACTERFUNCTIONS_H_INCLUDED
|