From c89abff8671749c53492be84c9061ee1e4c863f4 Mon Sep 17 00:00:00 2001 From: jules Date: Fri, 6 Sep 2013 11:25:13 +0100 Subject: [PATCH] Refactored some byte-order mark detection code. --- .../juce_core/text/juce_CharPointer_UTF16.h | 24 +++++++++++++++++++ .../juce_core/text/juce_CharPointer_UTF8.h | 15 +++++++++++- modules/juce_core/text/juce_String.cpp | 19 ++++++--------- 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/modules/juce_core/text/juce_CharPointer_UTF16.h b/modules/juce_core/text/juce_CharPointer_UTF16.h index c4de13bc92..e68d0edee3 100644 --- a/modules/juce_core/text/juce_CharPointer_UTF16.h +++ b/modules/juce_core/text/juce_CharPointer_UTF16.h @@ -483,6 +483,30 @@ public: byteOrderMarkLE2 = 0xfe }; + /** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian). + The pointer must not be null, and must contain at least two valid bytes. + */ + static bool isByteOrderMarkBigEndian (const void* possibleByteOrder) noexcept + { + jassert (possibleByteOrder != nullptr); + const uint8* const c = static_cast (possibleByteOrder); + + return c[0] == (uint8) byteOrderMarkBE1 + && c[1] == (uint8) byteOrderMarkBE2; + } + + /** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian). + The pointer must not be null, and must contain at least two valid bytes. + */ + static bool isByteOrderMarkLittleEndian (const void* possibleByteOrder) noexcept + { + jassert (possibleByteOrder != nullptr); + const uint8* const c = static_cast (possibleByteOrder); + + return c[0] == (uint8) byteOrderMarkLE1 + && c[1] == (uint8) byteOrderMarkLE2; + } + private: CharType* data; diff --git a/modules/juce_core/text/juce_CharPointer_UTF8.h b/modules/juce_core/text/juce_CharPointer_UTF8.h index 25d0040770..43117f49c7 100644 --- a/modules/juce_core/text/juce_CharPointer_UTF8.h +++ b/modules/juce_core/text/juce_CharPointer_UTF8.h @@ -550,7 +550,7 @@ public: return CharPointer_UTF8 (reinterpret_cast &> (data).exchange (newValue.data)); } - /** These values are the byte-order-mark (BOM) values for a UTF-8 stream. */ + /** These values are the byte-order mark (BOM) values for a UTF-8 stream. */ enum { byteOrderMark1 = 0xef, @@ -558,6 +558,19 @@ public: byteOrderMark3 = 0xbf }; + /** Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM). + The pointer must not be null, and must point to at least 3 valid bytes. + */ + static bool isByteOrderMark (const void* possibleByteOrder) noexcept + { + jassert (possibleByteOrder != nullptr); + const uint8* const c = static_cast (possibleByteOrder); + + return c[0] == (uint8) byteOrderMark1 + && c[1] == (uint8) byteOrderMark2 + && c[2] == (uint8) byteOrderMark3; + } + private: CharType* data; }; diff --git a/modules/juce_core/text/juce_String.cpp b/modules/juce_core/text/juce_String.cpp index 16909d8d74..b229b1b1c5 100644 --- a/modules/juce_core/text/juce_String.cpp +++ b/modules/juce_core/text/juce_String.cpp @@ -1914,9 +1914,9 @@ int String::getHexValue32() const noexcept { return HexConverter ::st int64 String::getHexValue64() const noexcept { return HexConverter::stringToHex (text); } //============================================================================== -String String::createStringFromData (const void* const data_, const int size) +String String::createStringFromData (const void* const unknownData, const int size) { - const uint8* const data = static_cast (data_); + const uint8* const data = static_cast (unknownData); if (size <= 0 || data == nullptr) return empty; @@ -1924,17 +1924,16 @@ String String::createStringFromData (const void* const data_, const int size) if (size == 1) return charToString ((juce_wchar) data[0]); - if ((data[0] == (uint8) CharPointer_UTF16::byteOrderMarkBE1 && data[1] == (uint8) CharPointer_UTF16::byteOrderMarkBE2) - || (data[0] == (uint8) CharPointer_UTF16::byteOrderMarkLE1 && data[1] == (uint8) CharPointer_UTF16::byteOrderMarkLE2)) + if (CharPointer_UTF16::isByteOrderMarkBigEndian (data) + || CharPointer_UTF16::isByteOrderMarkLittleEndian (data)) { - const bool bigEndian = (data[0] == (uint8) CharPointer_UTF16::byteOrderMarkBE1); const int numChars = size / 2 - 1; StringCreationHelper builder ((size_t) numChars); const uint16* const src = (const uint16*) (data + 2); - if (bigEndian) + if (CharPointer_UTF16::isByteOrderMarkBigEndian (data)) { for (int i = 0; i < numChars; ++i) builder.write ((juce_wchar) ByteOrder::swapIfLittleEndian (src[i])); @@ -1950,16 +1949,12 @@ String String::createStringFromData (const void* const data_, const int size) } const uint8* start = data; - const uint8* end = data + size; - if (size >= 3 - && data[0] == (uint8) CharPointer_UTF8::byteOrderMark1 - && data[1] == (uint8) CharPointer_UTF8::byteOrderMark2 - && data[2] == (uint8) CharPointer_UTF8::byteOrderMark3) + if (size >= 3 && CharPointer_UTF8::isByteOrderMark (data)) start += 3; return String (CharPointer_UTF8 ((const char*) start), - CharPointer_UTF8 ((const char*) end)); + CharPointer_UTF8 ((const char*) (data + size))); } //==============================================================================