Browse Source

Refactored some byte-order mark detection code.

tags/2021-05-28
jules 12 years ago
parent
commit
c89abff867
3 changed files with 45 additions and 13 deletions
  1. +24
    -0
      modules/juce_core/text/juce_CharPointer_UTF16.h
  2. +14
    -1
      modules/juce_core/text/juce_CharPointer_UTF8.h
  3. +7
    -12
      modules/juce_core/text/juce_String.cpp

+ 24
- 0
modules/juce_core/text/juce_CharPointer_UTF16.h View File

@@ -483,6 +483,30 @@ public:
byteOrderMarkLE2 = 0xfe
};
/** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian).
The pointer must not be null, and must contain at least two valid bytes.
*/
static bool isByteOrderMarkBigEndian (const void* possibleByteOrder) noexcept
{
jassert (possibleByteOrder != nullptr);
const uint8* const c = static_cast<const uint8*> (possibleByteOrder);
return c[0] == (uint8) byteOrderMarkBE1
&& c[1] == (uint8) byteOrderMarkBE2;
}
/** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian).
The pointer must not be null, and must contain at least two valid bytes.
*/
static bool isByteOrderMarkLittleEndian (const void* possibleByteOrder) noexcept
{
jassert (possibleByteOrder != nullptr);
const uint8* const c = static_cast<const uint8*> (possibleByteOrder);
return c[0] == (uint8) byteOrderMarkLE1
&& c[1] == (uint8) byteOrderMarkLE2;
}
private:
CharType* data;


+ 14
- 1
modules/juce_core/text/juce_CharPointer_UTF8.h View File

@@ -550,7 +550,7 @@ public:
return CharPointer_UTF8 (reinterpret_cast <Atomic<CharType*>&> (data).exchange (newValue.data));
}
/** These values are the byte-order-mark (BOM) values for a UTF-8 stream. */
/** These values are the byte-order mark (BOM) values for a UTF-8 stream. */
enum
{
byteOrderMark1 = 0xef,
@@ -558,6 +558,19 @@ public:
byteOrderMark3 = 0xbf
};
/** Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM).
The pointer must not be null, and must point to at least 3 valid bytes.
*/
static bool isByteOrderMark (const void* possibleByteOrder) noexcept
{
jassert (possibleByteOrder != nullptr);
const uint8* const c = static_cast<const uint8*> (possibleByteOrder);
return c[0] == (uint8) byteOrderMark1
&& c[1] == (uint8) byteOrderMark2
&& c[2] == (uint8) byteOrderMark3;
}
private:
CharType* data;
};


+ 7
- 12
modules/juce_core/text/juce_String.cpp View File

@@ -1914,9 +1914,9 @@ int String::getHexValue32() const noexcept { return HexConverter<int> ::st
int64 String::getHexValue64() const noexcept { return HexConverter<int64>::stringToHex (text); }
//==============================================================================
String String::createStringFromData (const void* const data_, const int size)
String String::createStringFromData (const void* const unknownData, const int size)
{
const uint8* const data = static_cast <const uint8*> (data_);
const uint8* const data = static_cast<const uint8*> (unknownData);
if (size <= 0 || data == nullptr)
return empty;
@@ -1924,17 +1924,16 @@ String String::createStringFromData (const void* const data_, const int size)
if (size == 1)
return charToString ((juce_wchar) data[0]);
if ((data[0] == (uint8) CharPointer_UTF16::byteOrderMarkBE1 && data[1] == (uint8) CharPointer_UTF16::byteOrderMarkBE2)
|| (data[0] == (uint8) CharPointer_UTF16::byteOrderMarkLE1 && data[1] == (uint8) CharPointer_UTF16::byteOrderMarkLE2))
if (CharPointer_UTF16::isByteOrderMarkBigEndian (data)
|| CharPointer_UTF16::isByteOrderMarkLittleEndian (data))
{
const bool bigEndian = (data[0] == (uint8) CharPointer_UTF16::byteOrderMarkBE1);
const int numChars = size / 2 - 1;
StringCreationHelper builder ((size_t) numChars);
const uint16* const src = (const uint16*) (data + 2);
if (bigEndian)
if (CharPointer_UTF16::isByteOrderMarkBigEndian (data))
{
for (int i = 0; i < numChars; ++i)
builder.write ((juce_wchar) ByteOrder::swapIfLittleEndian (src[i]));
@@ -1950,16 +1949,12 @@ String String::createStringFromData (const void* const data_, const int size)
}
const uint8* start = data;
const uint8* end = data + size;
if (size >= 3
&& data[0] == (uint8) CharPointer_UTF8::byteOrderMark1
&& data[1] == (uint8) CharPointer_UTF8::byteOrderMark2
&& data[2] == (uint8) CharPointer_UTF8::byteOrderMark3)
if (size >= 3 && CharPointer_UTF8::isByteOrderMark (data))
start += 3;
return String (CharPointer_UTF8 ((const char*) start),
CharPointer_UTF8 ((const char*) end));
CharPointer_UTF8 ((const char*) (data + size)));
}
//==============================================================================


Loading…
Cancel
Save