From 5d26b6a20f541f35b40d5eb56064db00b15f235f Mon Sep 17 00:00:00 2001 From: reuk Date: Tue, 17 Nov 2020 13:50:54 +0000 Subject: [PATCH] Aiff: Fix extremely slow loads of pathological files If an aiff file advertied a large number of metadata keys, the StringPairArray::set calls resulted in quadratic complexity. We now read keys into a std::map instead, as it enables much faster lookup, and then convert back to a StringPairArray at the last possible moment. --- .../codecs/juce_AiffAudioFormat.cpp | 93 +++++++----- modules/juce_core/text/juce_StringArray.cpp | 5 + modules/juce_core/text/juce_StringArray.h | 6 + .../juce_core/text/juce_StringPairArray.cpp | 142 ++++++++++++++++++ modules/juce_core/text/juce_StringPairArray.h | 7 + 5 files changed, 214 insertions(+), 39 deletions(-) diff --git a/modules/juce_audio_formats/codecs/juce_AiffAudioFormat.cpp b/modules/juce_audio_formats/codecs/juce_AiffAudioFormat.cpp index b2167cf4ec..a389d009a5 100644 --- a/modules/juce_audio_formats/codecs/juce_AiffAudioFormat.cpp +++ b/modules/juce_audio_formats/codecs/juce_AiffAudioFormat.cpp @@ -67,25 +67,25 @@ namespace AiffFileHelpers Loop sustainLoop; Loop releaseLoop; - void copyTo (StringPairArray& values) const + void copyTo (std::map& values) const { - values.set ("MidiUnityNote", String (baseNote)); - values.set ("Detune", String (detune)); + values.emplace ("MidiUnityNote", String (baseNote)); + values.emplace ("Detune", String (detune)); - values.set ("LowNote", String (lowNote)); - values.set ("HighNote", String (highNote)); - values.set ("LowVelocity", String (lowVelocity)); - values.set ("HighVelocity", String (highVelocity)); + values.emplace ("LowNote", String (lowNote)); + values.emplace ("HighNote", String (highNote)); + values.emplace ("LowVelocity", String (lowVelocity)); + values.emplace ("HighVelocity", String (highVelocity)); - values.set ("Gain", String ((int16) ByteOrder::swapIfLittleEndian ((uint16) gain))); + values.emplace ("Gain", String ((int16) ByteOrder::swapIfLittleEndian ((uint16) gain))); - values.set ("NumSampleLoops", String (2)); // always 2 with AIFF, WAV can have more - values.set ("Loop0Type", String (ByteOrder::swapIfLittleEndian (sustainLoop.type))); - values.set ("Loop0StartIdentifier", String (ByteOrder::swapIfLittleEndian (sustainLoop.startIdentifier))); - values.set ("Loop0EndIdentifier", String (ByteOrder::swapIfLittleEndian (sustainLoop.endIdentifier))); - values.set ("Loop1Type", String (ByteOrder::swapIfLittleEndian (releaseLoop.type))); - values.set ("Loop1StartIdentifier", String (ByteOrder::swapIfLittleEndian (releaseLoop.startIdentifier))); - values.set ("Loop1EndIdentifier", String (ByteOrder::swapIfLittleEndian (releaseLoop.endIdentifier))); + values.emplace ("NumSampleLoops", String (2)); // always 2 with AIFF, WAV can have more + values.emplace ("Loop0Type", String (ByteOrder::swapIfLittleEndian (sustainLoop.type))); + values.emplace ("Loop0StartIdentifier", String (ByteOrder::swapIfLittleEndian (sustainLoop.startIdentifier))); + values.emplace ("Loop0EndIdentifier", String (ByteOrder::swapIfLittleEndian (sustainLoop.endIdentifier))); + values.emplace ("Loop1Type", String (ByteOrder::swapIfLittleEndian (releaseLoop.type))); + values.emplace ("Loop1StartIdentifier", String (ByteOrder::swapIfLittleEndian (releaseLoop.startIdentifier))); + values.emplace ("Loop1EndIdentifier", String (ByteOrder::swapIfLittleEndian (releaseLoop.endIdentifier))); } static uint16 getValue16 (const StringPairArray& values, const char* name, const char* def) @@ -149,7 +149,7 @@ namespace AiffFileHelpers input.read (unknown, sizeof (unknown)); } - void addToMetadata (StringPairArray& metadata) const + void addToMetadata (std::map& metadata) const { const bool rootNoteSet = rootNote != 0; @@ -157,11 +157,11 @@ namespace AiffFileHelpers setBoolFlag (metadata, AiffAudioFormat::appleRootSet, rootNoteSet); if (rootNoteSet) - metadata.set (AiffAudioFormat::appleRootNote, String (rootNote)); + metadata.emplace (AiffAudioFormat::appleRootNote, String (rootNote)); - metadata.set (AiffAudioFormat::appleBeats, String (numBeats)); - metadata.set (AiffAudioFormat::appleDenominator, String (timeSigDen)); - metadata.set (AiffAudioFormat::appleNumerator, String (timeSigNum)); + metadata.emplace (AiffAudioFormat::appleBeats, String (numBeats)); + metadata.emplace (AiffAudioFormat::appleDenominator, String (timeSigDen)); + metadata.emplace (AiffAudioFormat::appleNumerator, String (timeSigNum)); const char* keyString = nullptr; @@ -175,12 +175,14 @@ namespace AiffFileHelpers } if (keyString != nullptr) - metadata.set (AiffAudioFormat::appleKey, keyString); + metadata.emplace (AiffAudioFormat::appleKey, keyString); } - void setBoolFlag (StringPairArray& values, const char* name, bool shouldBeSet) const + void setBoolFlag (std::map& values, + const char* name, + bool shouldBeSet) const { - values.set (name, shouldBeSet ? "1" : "0"); + values.emplace (name, shouldBeSet ? "1" : "0"); } uint32 flags; @@ -388,6 +390,17 @@ public: { using namespace AiffFileHelpers; + std::map metadataValuesMap; + + for (int i = 0; i != metadataValues.size(); ++i) + { + metadataValuesMap.emplace (metadataValues.getAllKeys().getReference (i), + metadataValues.getAllValues().getReference (i)); + } + + // If this fails, there were duplicate keys in the metadata + jassert ((size_t) metadataValuesMap.size() == (size_t) metadataValues.size()); + if (input->readInt() == chunkName ("FORM")) { auto len = input->readIntBigEndian(); @@ -479,8 +492,8 @@ public: auto numCues = (uint16) input->readShortBigEndian(); // these two are always the same for AIFF-read files - metadataValues.set ("NumCuePoints", String (numCues)); - metadataValues.set ("NumCueLabels", String (numCues)); + metadataValuesMap.emplace ("NumCuePoints", String (numCues)); + metadataValuesMap.emplace ("NumCueLabels", String (numCues)); for (uint16 i = 0; i < numCues; ++i) { @@ -497,18 +510,18 @@ public: input->readByte(); auto prefixCue = "Cue" + String (i); - metadataValues.set (prefixCue + "Identifier", String (identifier)); - metadataValues.set (prefixCue + "Offset", String (offset)); + metadataValuesMap.emplace (prefixCue + "Identifier", String (identifier)); + metadataValuesMap.emplace (prefixCue + "Offset", String (offset)); auto prefixLabel = "CueLabel" + String (i); - metadataValues.set (prefixLabel + "Identifier", String (identifier)); - metadataValues.set (prefixLabel + "Text", textBlock.toString()); + metadataValuesMap.emplace (prefixLabel + "Identifier", String (identifier)); + metadataValuesMap.emplace (prefixLabel + "Text", textBlock.toString()); } } else if (type == chunkName ("COMT")) { auto numNotes = (uint16) input->readShortBigEndian(); - metadataValues.set ("NumCueNotes", String (numNotes)); + metadataValuesMap.emplace ("NumCueNotes", String (numNotes)); for (uint16 i = 0; i < numNotes; ++i) { @@ -520,9 +533,9 @@ public: input->readIntoMemoryBlock (textBlock, stringLength + (stringLength & 1)); auto prefix = "CueNote" + String (i); - metadataValues.set (prefix + "TimeStamp", String (timestamp)); - metadataValues.set (prefix + "Identifier", String (identifier)); - metadataValues.set (prefix + "Text", textBlock.toString()); + metadataValuesMap.emplace (prefix + "TimeStamp", String (timestamp)); + metadataValuesMap.emplace (prefix + "Identifier", String (identifier)); + metadataValuesMap.emplace (prefix + "Text", textBlock.toString()); } } else if (type == chunkName ("INST")) @@ -530,16 +543,16 @@ public: HeapBlock inst; inst.calloc (jmax ((size_t) length + 1, sizeof (InstChunk)), 1); input->read (inst, (int) length); - inst->copyTo (metadataValues); + inst->copyTo (metadataValuesMap); } else if (type == chunkName ("basc")) { - AiffFileHelpers::BASCChunk (*input).addToMetadata (metadataValues); + AiffFileHelpers::BASCChunk (*input).addToMetadata (metadataValuesMap); } else if (type == chunkName ("cate")) { - metadataValues.set (AiffAudioFormat::appleTag, - AiffFileHelpers::CATEChunk::read (*input, length)); + metadataValuesMap.emplace (AiffAudioFormat::appleTag, + AiffFileHelpers::CATEChunk::read (*input, length)); } else if ((hasGotVer && hasGotData && hasGotType) || chunkEnd < input->getPosition() @@ -553,8 +566,10 @@ public: } } - if (metadataValues.size() > 0) - metadataValues.set ("MetaDataSource", "AIFF"); + if (metadataValuesMap.size() > 0) + metadataValuesMap.emplace ("MetaDataSource", "AIFF"); + + metadataValues.addMap (metadataValuesMap); } //============================================================================== diff --git a/modules/juce_core/text/juce_StringArray.cpp b/modules/juce_core/text/juce_StringArray.cpp index 8523dc9669..5dc50eb9c3 100644 --- a/modules/juce_core/text/juce_StringArray.cpp +++ b/modules/juce_core/text/juce_StringArray.cpp @@ -132,6 +132,11 @@ String& StringArray::getReference (int index) noexcept return strings.getReference (index); } +const String& StringArray::getReference (int index) const noexcept +{ + return strings.getReference (index); +} + void StringArray::add (String newString) { // NB: the local temp copy is to avoid a dangling pointer if the diff --git a/modules/juce_core/text/juce_StringArray.h b/modules/juce_core/text/juce_StringArray.h index 6f3670bd7d..ac597b748e 100644 --- a/modules/juce_core/text/juce_StringArray.h +++ b/modules/juce_core/text/juce_StringArray.h @@ -152,6 +152,12 @@ public: */ String& getReference (int index) noexcept; + /** Returns a reference to one of the strings in the array. + This lets you modify a string in-place in the array, but you must be sure that + the index is in-range. + */ + const String& getReference (int index) const noexcept; + /** Returns a pointer to the first String in the array. This method is provided for compatibility with standard C++ iteration mechanisms. */ diff --git a/modules/juce_core/text/juce_StringPairArray.cpp b/modules/juce_core/text/juce_StringPairArray.cpp index a906bf98b7..b1b12f92b5 100644 --- a/modules/juce_core/text/juce_StringPairArray.cpp +++ b/modules/juce_core/text/juce_StringPairArray.cpp @@ -145,6 +145,11 @@ void StringPairArray::setIgnoresCase (bool shouldIgnoreCase) ignoreCase = shouldIgnoreCase; } +bool StringPairArray::getIgnoresCase() const noexcept +{ + return ignoreCase; +} + String StringPairArray::getDescription() const { String s; @@ -166,4 +171,141 @@ void StringPairArray::minimiseStorageOverheads() values.minimiseStorageOverheads(); } +void StringPairArray::addMap (const std::map& toAdd) +{ + // If we just called `set` for each item in `toAdd`, that would + // perform badly when adding to large StringPairArrays, as `set` + // has to loop through the whole container looking for matching keys. + // Instead, we use a temporary map to give us better lookup performance. + std::map contents; + + const auto normaliseKey = [this] (const String& key) + { + return ignoreCase ? key.toLowerCase() : key; + }; + + for (auto i = 0; i != size(); ++i) + contents.emplace (normaliseKey (getAllKeys().getReference (i)), i); + + for (const auto& pair : toAdd) + { + const auto key = normaliseKey (pair.first); + const auto it = contents.find (key); + + if (it != contents.cend()) + { + values.getReference (it->second) = pair.second; + } + else + { + contents.emplace (key, static_cast (contents.size())); + keys.add (pair.first); + values.add (pair.second); + } + } +} + +//============================================================================== +//============================================================================== +#if JUCE_UNIT_TESTS + +static String operator""_S (const char* chars, size_t) +{ + return String { chars }; +} + +class StringPairArrayTests : public UnitTest +{ +public: + StringPairArrayTests() + : UnitTest ("StringPairArray", UnitTestCategories::text) + {} + + void runTest() override + { + beginTest ("addMap respects case sensitivity of StringPairArray"); + { + StringPairArray insensitive { true }; + insensitive.addMap ({ { "duplicate", "a" }, + { "Duplicate", "b" } }); + + expect (insensitive.size() == 1); + expectEquals (insensitive["DUPLICATE"], "a"_S); + + StringPairArray sensitive { false }; + sensitive.addMap ({ { "duplicate", "a"_S }, + { "Duplicate", "b"_S } }); + + expect (sensitive.size() == 2); + expectEquals (sensitive["duplicate"], "a"_S); + expectEquals (sensitive["Duplicate"], "b"_S); + expectEquals (sensitive["DUPLICATE"], ""_S); + } + + beginTest ("addMap overwrites existing pairs"); + { + StringPairArray insensitive { true }; + insensitive.set ("key", "value"); + insensitive.addMap ({ { "KEY", "VALUE" } }); + + expect (insensitive.size() == 1); + expectEquals (insensitive.getAllKeys()[0], "key"_S); + expectEquals (insensitive.getAllValues()[0], "VALUE"_S); + + StringPairArray sensitive { false }; + sensitive.set ("key", "value"); + sensitive.addMap ({ { "KEY", "VALUE" }, + { "key", "another value" } }); + + expect (sensitive.size() == 2); + expect (sensitive.getAllKeys() == StringArray { "key", "KEY" }); + expect (sensitive.getAllValues() == StringArray { "another value", "VALUE" }); + } + + beginTest ("addMap doesn't change the order of existing keys"); + { + StringPairArray array; + array.set ("a", "a"); + array.set ("z", "z"); + array.set ("b", "b"); + array.set ("y", "y"); + array.set ("c", "c"); + + array.addMap ({ { "B", "B" }, + { "0", "0" }, + { "Z", "Z" } }); + + expect (array.getAllKeys() == StringArray { "a", "z", "b", "y", "c", "0" }); + expect (array.getAllValues() == StringArray { "a", "Z", "B", "y", "c", "0" }); + } + + beginTest ("addMap has equivalent behaviour to addArray"); + { + StringPairArray initial; + initial.set ("aaa", "aaa"); + initial.set ("zzz", "zzz"); + initial.set ("bbb", "bbb"); + + auto withAddMap = initial; + withAddMap.addMap ({ { "ZZZ", "ZZZ" }, + { "ddd", "ddd" } }); + + auto withAddArray = initial; + withAddArray.addArray ([] + { + StringPairArray toAdd; + toAdd.set ("ZZZ", "ZZZ"); + toAdd.set ("ddd", "ddd"); + return toAdd; + }()); + + expect (withAddMap == withAddArray); + } + } +}; + +static StringPairArrayTests stringPairArrayTests; + +#endif + } // namespace juce diff --git a/modules/juce_core/text/juce_StringPairArray.h b/modules/juce_core/text/juce_StringPairArray.h index 77d486c92b..16b9a99c1e 100644 --- a/modules/juce_core/text/juce_StringPairArray.h +++ b/modules/juce_core/text/juce_StringPairArray.h @@ -124,6 +124,10 @@ public: */ void setIgnoresCase (bool shouldIgnoreCase); + /** Indicates whether a case-insensitive search is used when looking up a key string. + */ + bool getIgnoresCase() const noexcept; + //============================================================================== /** Returns a descriptive string containing the items. This is handy for dumping the contents of an array. @@ -139,6 +143,9 @@ public: */ void minimiseStorageOverheads(); + //============================================================================== + /** Adds the contents of a map to this StringPairArray. */ + void addMap (const std::map& mapToAdd); private: //==============================================================================