From c4cdec7bd9e9817bdcfd8ac5d39375c00d797eff Mon Sep 17 00:00:00 2001 From: jules Date: Sat, 5 Mar 2016 15:05:48 +0000 Subject: [PATCH] Added method CharacterFunctions::getUnicodeCharFromWindows1252Codepage() and used this to help parse difficult WAV file metadata --- .../codecs/juce_WavAudioFormat.cpp | 20 ++++++++++++++++++- .../text/juce_CharacterFunctions.cpp | 13 ++++++++++++ .../juce_core/text/juce_CharacterFunctions.h | 3 +++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/modules/juce_audio_formats/codecs/juce_WavAudioFormat.cpp b/modules/juce_audio_formats/codecs/juce_WavAudioFormat.cpp index 75603b1524..4d4000e05d 100644 --- a/modules/juce_audio_formats/codecs/juce_WavAudioFormat.cpp +++ b/modules/juce_audio_formats/codecs/juce_WavAudioFormat.cpp @@ -646,6 +646,24 @@ namespace WavFileHelpers return true; } + static String getStringFromWindows1252Codepage (const uint8* data, size_t num) + { + HeapBlock unicode (num + 1); + + for (size_t i = 0; i < num; ++i) + unicode[i] = CharacterFunctions::getUnicodeCharFromWindows1252Codepage (data[i]); + + unicode[num] = 0; + return CharPointer_UTF32 (unicode); + } + + static String getStringFromData (const MemoryBlock& mb) + { + return CharPointer_UTF8::isValidString ((const char*) mb.getData(), mb.getSize()) + ? mb.toString() + : getStringFromWindows1252Codepage ((const uint8*) mb.getData(), mb.getSize()); + } + static void addToMetadata (StringPairArray& values, InputStream& input, int64 chunkEnd) { while (input.getPosition() < chunkEnd) @@ -664,7 +682,7 @@ namespace WavFileHelpers { MemoryBlock mb; input.readIntoMemoryBlock (mb, (ssize_t) infoLength); - values.set (types[i], mb.toString()); + values.set (types[i], getStringFromData (mb)); break; } } diff --git a/modules/juce_core/text/juce_CharacterFunctions.cpp b/modules/juce_core/text/juce_CharacterFunctions.cpp index d0a505f7fc..4db016a2ac 100644 --- a/modules/juce_core/text/juce_CharacterFunctions.cpp +++ b/modules/juce_core/text/juce_CharacterFunctions.cpp @@ -162,3 +162,16 @@ double CharacterFunctions::mulexp10 (const double value, int exponent) noexcept return negative ? (value / result) : (value * result); } + +juce_wchar CharacterFunctions::getUnicodeCharFromWindows1252Codepage (const uint8 c) noexcept +{ + if (c < 0x80 || c >= 0xa0) + return (juce_wchar) c; + + static const uint16 lookup[] = { 0x20AC, 0x0007, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0007, 0x017D, 0x0007, + 0x0007, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0007, 0x017E, 0x0178 }; + + return (juce_wchar) lookup[c - 0x80]; +} diff --git a/modules/juce_core/text/juce_CharacterFunctions.h b/modules/juce_core/text/juce_CharacterFunctions.h index 9628e792a4..4b006e2ac9 100644 --- a/modules/juce_core/text/juce_CharacterFunctions.h +++ b/modules/juce_core/text/juce_CharacterFunctions.h @@ -123,6 +123,9 @@ public: /** Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit. */ static int getHexDigitValue (juce_wchar digit) noexcept; + /** Converts a byte of Windows 1252 codepage to unicode. */ + static juce_wchar getUnicodeCharFromWindows1252Codepage (uint8 windows1252Char) noexcept; + //============================================================================== /** Parses a character string to read a floating-point number. Note that this will advance the pointer that is passed in, leaving it at