|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885 |
- /*
- ==============================================================================
-
- This file is part of the Water library.
- Copyright (c) 2016 ROLI Ltd.
- Copyright (C) 2017-2018 Filipe Coelho <falktx@falktx.com>
-
- Permission is granted to use this software under the terms of the ISC license
- http://www.isc.org/downloads/software-support-policy/isc-license/
-
- Permission to use, copy, modify, and/or distribute this software for any
- purpose with or without fee is hereby granted, provided that the above
- copyright notice and this permission notice appear in all copies.
-
- THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
- TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
- FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
- OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
- TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- OF THIS SOFTWARE.
-
- ==============================================================================
- */
-
- #include "XmlDocument.h"
- #include "XmlElement.h"
- #include "../containers/LinkedListPointer.h"
- #include "../streams/FileInputSource.h"
- #include "../streams/InputStream.h"
- #include "../streams/MemoryOutputStream.h"
-
- namespace water {
-
- XmlDocument::XmlDocument (const String& documentText)
- : originalText (documentText),
- input (nullptr),
- outOfData (false),
- errorOccurred (false),
- needToLoadDTD (false),
- ignoreEmptyTextElements (true)
- {
- }
-
- XmlDocument::XmlDocument (const File& file)
- : input (nullptr),
- outOfData (false),
- errorOccurred (false),
- needToLoadDTD (false),
- ignoreEmptyTextElements (true),
- inputSource (new FileInputSource (file))
- {
- }
-
- XmlDocument::~XmlDocument()
- {
- }
-
- XmlElement* XmlDocument::parse (const File& file)
- {
- XmlDocument doc (file);
- return doc.getDocumentElement();
- }
-
- XmlElement* XmlDocument::parse (const String& xmlData)
- {
- XmlDocument doc (xmlData);
- return doc.getDocumentElement();
- }
-
- void XmlDocument::setInputSource (FileInputSource* const newSource) noexcept
- {
- inputSource = newSource;
- }
-
- void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
- {
- ignoreEmptyTextElements = shouldBeIgnored;
- }
-
- namespace XmlIdentifierChars
- {
- static bool isIdentifierCharSlow (const water_uchar c) noexcept
- {
- return CharacterFunctions::isLetterOrDigit (c)
- || c == '_' || c == '-' || c == ':' || c == '.';
- }
-
- static bool isIdentifierChar (const water_uchar c) noexcept
- {
- static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
-
- return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
- : isIdentifierCharSlow (c);
- }
-
- /*static void generateIdentifierCharConstants()
- {
- uint32 n[8] = { 0 };
- for (int i = 0; i < 256; ++i)
- if (isIdentifierCharSlow (i))
- n[i >> 5] |= (1 << (i & 31));
-
- String s;
- for (int i = 0; i < 8; ++i)
- s << "0x" << String::toHexString ((int) n[i]) << ", ";
-
- DBG (s);
- }*/
-
- static String::CharPointerType findEndOfToken (String::CharPointerType p)
- {
- while (isIdentifierChar (*p))
- ++p;
-
- return p;
- }
- }
-
- XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
- {
- if (originalText.isEmpty() && inputSource != nullptr)
- {
- ScopedPointer<InputStream> in (inputSource->createInputStream());
-
- if (in != nullptr)
- {
- MemoryOutputStream data;
- data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
-
- if (data.getDataSize() > 2)
- {
- data.writeByte (0);
- const char* text = static_cast<const char*> (data.getData());
-
- if (CharPointer_UTF8::isByteOrderMark (text))
- text += 3;
-
- // parse the input buffer directly to avoid copying it all to a string..
- return parseDocumentElement (String::CharPointerType (text), onlyReadOuterDocumentElement);
- }
- }
- }
-
- return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
- }
-
- const String& XmlDocument::getLastParseError() const noexcept
- {
- return lastError;
- }
-
- void XmlDocument::setLastError (const String& desc, const bool carryOn)
- {
- lastError = desc;
- errorOccurred = ! carryOn;
- }
-
- String XmlDocument::getFileContents (const String& filename) const
- {
- if (inputSource != nullptr)
- {
- const ScopedPointer<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
-
- if (in != nullptr)
- return in->readEntireStreamAsString();
- }
-
- return String();
- }
-
- water_uchar XmlDocument::readNextChar() noexcept
- {
- const water_uchar c = input.getAndAdvance();
-
- if (c == 0)
- {
- outOfData = true;
- --input;
- }
-
- return c;
- }
-
- XmlElement* XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
- const bool onlyReadOuterDocumentElement)
- {
- input = textToParse;
- errorOccurred = false;
- outOfData = false;
- needToLoadDTD = true;
-
- if (textToParse.isEmpty())
- {
- lastError = "not enough input";
- }
- else if (! parseHeader())
- {
- lastError = "malformed header";
- }
- else if (! parseDTD())
- {
- lastError = "malformed DTD";
- }
- else
- {
- lastError.clear();
-
- ScopedPointer<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
-
- if (! errorOccurred)
- return result.release();
- }
-
- return nullptr;
- }
-
- bool XmlDocument::parseHeader()
- {
- skipNextWhiteSpace();
-
- if (CharacterFunctions::compareUpTo (input, CharPointer_UTF8 ("<?xml"), 5) == 0)
- {
- const String::CharPointerType headerEnd (CharacterFunctions::find (input, CharPointer_UTF8 ("?>")));
-
- if (headerEnd.isEmpty())
- return false;
-
- const String encoding (String (input, headerEnd)
- .fromFirstOccurrenceOf ("encoding", false, true)
- .fromFirstOccurrenceOf ("=", false, false)
- .fromFirstOccurrenceOf ("\"", false, false)
- .upToFirstOccurrenceOf ("\"", false, false).trim());
-
- /* If you load an XML document with a non-UTF encoding type, it may have been
- loaded wrongly.. Since all the files are read via the normal water file streams,
- they're treated as UTF-8, so by the time it gets to the parser, the encoding will
- have been lost. Best plan is to stick to utf-8 or if you have specific files to
- read, use your own code to convert them to a unicode String, and pass that to the
- XML parser.
- */
- CARLA_SAFE_ASSERT_RETURN (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"), false);
-
- input = headerEnd + 2;
- skipNextWhiteSpace();
- }
-
- return true;
- }
-
- bool XmlDocument::parseDTD()
- {
- if (CharacterFunctions::compareUpTo (input, CharPointer_UTF8 ("<!DOCTYPE"), 9) == 0)
- {
- input += 9;
- const String::CharPointerType dtdStart (input);
-
- for (int n = 1; n > 0;)
- {
- const water_uchar c = readNextChar();
-
- if (outOfData)
- return false;
-
- if (c == '<')
- ++n;
- else if (c == '>')
- --n;
- }
-
- dtdText = String (dtdStart, input - 1).trim();
- }
-
- return true;
- }
-
- void XmlDocument::skipNextWhiteSpace()
- {
- for (;;)
- {
- input = input.findEndOfWhitespace();
-
- if (input.isEmpty())
- {
- outOfData = true;
- break;
- }
-
- if (*input == '<')
- {
- if (input[1] == '!'
- && input[2] == '-'
- && input[3] == '-')
- {
- input += 4;
- const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
-
- if (closeComment < 0)
- {
- outOfData = true;
- break;
- }
-
- input += closeComment + 3;
- continue;
- }
-
- if (input[1] == '?')
- {
- input += 2;
- const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
-
- if (closeBracket < 0)
- {
- outOfData = true;
- break;
- }
-
- input += closeBracket + 2;
- continue;
- }
- }
-
- break;
- }
- }
-
- void XmlDocument::readQuotedString (String& result)
- {
- const water_uchar quote = readNextChar();
-
- while (! outOfData)
- {
- const water_uchar c = readNextChar();
-
- if (c == quote)
- break;
-
- --input;
-
- if (c == '&')
- {
- readEntity (result);
- }
- else
- {
- const String::CharPointerType start (input);
-
- for (;;)
- {
- const water_uchar character = *input;
-
- if (character == quote)
- {
- result.appendCharPointer (start, input);
- ++input;
- return;
- }
- else if (character == '&')
- {
- result.appendCharPointer (start, input);
- break;
- }
- else if (character == 0)
- {
- setLastError ("unmatched quotes", false);
- outOfData = true;
- break;
- }
-
- ++input;
- }
- }
- }
- }
-
- XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
- {
- XmlElement* node = nullptr;
-
- skipNextWhiteSpace();
- if (outOfData)
- return nullptr;
-
- if (*input == '<')
- {
- ++input;
- String::CharPointerType endOfToken (XmlIdentifierChars::findEndOfToken (input));
-
- if (endOfToken == input)
- {
- // no tag name - but allow for a gap after the '<' before giving an error
- skipNextWhiteSpace();
- endOfToken = XmlIdentifierChars::findEndOfToken (input);
-
- if (endOfToken == input)
- {
- setLastError ("tag name missing", false);
- return node;
- }
- }
-
- node = new XmlElement (input, endOfToken);
- input = endOfToken;
- LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
-
- // look for attributes
- for (;;)
- {
- skipNextWhiteSpace();
-
- const water_uchar c = *input;
-
- // empty tag..
- if (c == '/' && input[1] == '>')
- {
- input += 2;
- break;
- }
-
- // parse the guts of the element..
- if (c == '>')
- {
- ++input;
-
- if (alsoParseSubElements)
- readChildElements (*node);
-
- break;
- }
-
- // get an attribute..
- if (XmlIdentifierChars::isIdentifierChar (c))
- {
- String::CharPointerType attNameEnd (XmlIdentifierChars::findEndOfToken (input));
-
- if (attNameEnd != input)
- {
- const String::CharPointerType attNameStart (input);
- input = attNameEnd;
-
- skipNextWhiteSpace();
-
- if (readNextChar() == '=')
- {
- skipNextWhiteSpace();
-
- const water_uchar nextChar = *input;
-
- if (nextChar == '"' || nextChar == '\'')
- {
- XmlElement::XmlAttributeNode* const newAtt
- = new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
-
- readQuotedString (newAtt->value);
- attributeAppender.append (newAtt);
- continue;
- }
- }
- else
- {
- setLastError ("expected '=' after attribute '"
- + String (attNameStart, attNameEnd) + "'", false);
- return node;
- }
- }
- }
- else
- {
- if (! outOfData)
- setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
- }
-
- break;
- }
- }
-
- return node;
- }
-
- void XmlDocument::readChildElements (XmlElement& parent)
- {
- LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
-
- for (;;)
- {
- const String::CharPointerType preWhitespaceInput (input);
- skipNextWhiteSpace();
-
- if (outOfData)
- {
- setLastError ("unmatched tags", false);
- break;
- }
-
- if (*input == '<')
- {
- const water_uchar c1 = input[1];
-
- if (c1 == '/')
- {
- // our close tag..
- const int closeTag = input.indexOf ((water_uchar) '>');
-
- if (closeTag >= 0)
- input += closeTag + 1;
-
- break;
- }
-
- if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_UTF8 ("[CDATA["), 7) == 0)
- {
- input += 9;
- const String::CharPointerType inputStart (input);
-
- for (;;)
- {
- const water_uchar c0 = *input;
-
- if (c0 == 0)
- {
- setLastError ("unterminated CDATA section", false);
- outOfData = true;
- break;
- }
- else if (c0 == ']'
- && input[1] == ']'
- && input[2] == '>')
- {
- childAppender.append (XmlElement::createTextElement (String (inputStart, input)));
- input += 3;
- break;
- }
-
- ++input;
- }
- }
- else
- {
- // this is some other element, so parse and add it..
- if (XmlElement* const n = readNextElement (true))
- childAppender.append (n);
- else
- break;
- }
- }
- else // must be a character block
- {
- input = preWhitespaceInput; // roll back to include the leading whitespace
- MemoryOutputStream textElementContent;
- bool contentShouldBeUsed = ! ignoreEmptyTextElements;
-
- for (;;)
- {
- const water_uchar c = *input;
-
- if (c == '<')
- {
- if (input[1] == '!' && input[2] == '-' && input[3] == '-')
- {
- input += 4;
- const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
-
- if (closeComment < 0)
- {
- setLastError ("unterminated comment", false);
- outOfData = true;
- return;
- }
-
- input += closeComment + 3;
- continue;
- }
-
- break;
- }
-
- if (c == 0)
- {
- setLastError ("unmatched tags", false);
- outOfData = true;
- return;
- }
-
- if (c == '&')
- {
- String entity;
- readEntity (entity);
-
- if (entity.startsWithChar ('<') && entity [1] != 0)
- {
- const String::CharPointerType oldInput (input);
- const bool oldOutOfData = outOfData;
-
- input = entity.getCharPointer();
- outOfData = false;
-
- while (XmlElement* n = readNextElement (true))
- childAppender.append (n);
-
- input = oldInput;
- outOfData = oldOutOfData;
- }
- else
- {
- textElementContent << entity;
- contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
- }
- }
- else
- {
- for (;; ++input)
- {
- water_uchar nextChar = *input;
-
- if (nextChar == '\r')
- {
- nextChar = '\n';
-
- if (input[1] == '\n')
- continue;
- }
-
- if (nextChar == '<' || nextChar == '&')
- break;
-
- if (nextChar == 0)
- {
- setLastError ("unmatched tags", false);
- outOfData = true;
- return;
- }
-
- textElementContent.appendUTF8Char (nextChar);
- contentShouldBeUsed = contentShouldBeUsed || ! CharacterFunctions::isWhitespace (nextChar);
- }
- }
- }
-
- if (contentShouldBeUsed)
- childAppender.append (XmlElement::createTextElement (textElementContent.toUTF8()));
- }
- }
- }
-
- void XmlDocument::readEntity (String& result)
- {
- // skip over the ampersand
- ++input;
-
- if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
- {
- input += 4;
- result += '&';
- }
- else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
- {
- input += 5;
- result += '"';
- }
- else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
- {
- input += 5;
- result += '\'';
- }
- else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
- {
- input += 3;
- result += '<';
- }
- else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
- {
- input += 3;
- result += '>';
- }
- else if (*input == '#')
- {
- int charCode = 0;
- ++input;
-
- if (*input == 'x' || *input == 'X')
- {
- ++input;
- int numChars = 0;
-
- while (input[0] != ';')
- {
- const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
-
- if (hexValue < 0 || ++numChars > 8)
- {
- setLastError ("illegal escape sequence", true);
- break;
- }
-
- charCode = (charCode << 4) | hexValue;
- ++input;
- }
-
- ++input;
- }
- else if (input[0] >= '0' && input[0] <= '9')
- {
- int numChars = 0;
-
- while (input[0] != ';')
- {
- if (++numChars > 12)
- {
- setLastError ("illegal escape sequence", true);
- break;
- }
-
- charCode = charCode * 10 + ((int) input[0] - '0');
- ++input;
- }
-
- ++input;
- }
- else
- {
- setLastError ("illegal escape sequence", true);
- result += '&';
- return;
- }
-
- result << (water_uchar) charCode;
- }
- else
- {
- const String::CharPointerType entityNameStart (input);
- const int closingSemiColon = input.indexOf ((water_uchar) ';');
-
- if (closingSemiColon < 0)
- {
- outOfData = true;
- result += '&';
- }
- else
- {
- input += closingSemiColon + 1;
-
- result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
- }
- }
- }
-
- String XmlDocument::expandEntity (const String& ent)
- {
- if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
- if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
- if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
- if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
- if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
-
- if (ent[0] == '#')
- {
- const water_uchar char1 = ent[1];
-
- if (char1 == 'x' || char1 == 'X')
- return String::charToString (static_cast<water_uchar> (ent.substring (2).getHexValue32()));
-
- if (char1 >= '0' && char1 <= '9')
- return String::charToString (static_cast<water_uchar> (ent.substring (1).getIntValue()));
-
- setLastError ("illegal escape sequence", false);
- return String::charToString ('&');
- }
-
- return expandExternalEntity (ent);
- }
-
- String XmlDocument::expandExternalEntity (const String& entity)
- {
- if (needToLoadDTD)
- {
- if (dtdText.isNotEmpty())
- {
- dtdText = dtdText.trimCharactersAtEnd (">");
- tokenisedDTD.addTokens (dtdText, true);
-
- if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
- && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
- {
- const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
-
- tokenisedDTD.clear();
- tokenisedDTD.addTokens (getFileContents (fn), true);
- }
- else
- {
- tokenisedDTD.clear();
- const int openBracket = dtdText.indexOfChar ('[');
-
- if (openBracket > 0)
- {
- const int closeBracket = dtdText.lastIndexOfChar (']');
-
- if (closeBracket > openBracket)
- tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
- closeBracket), true);
- }
- }
-
- for (int i = tokenisedDTD.size(); --i >= 0;)
- {
- if (tokenisedDTD[i].startsWithChar ('%')
- && tokenisedDTD[i].endsWithChar (';'))
- {
- const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
- StringArray newToks;
- newToks.addTokens (parsed, true);
-
- tokenisedDTD.remove (i);
-
- for (int j = newToks.size(); --j >= 0;)
- tokenisedDTD.insert (i, newToks[j]);
- }
- }
- }
-
- needToLoadDTD = false;
- }
-
- for (int i = 0; i < tokenisedDTD.size(); ++i)
- {
- if (tokenisedDTD[i] == entity)
- {
- if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
- {
- String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
-
- // check for sub-entities..
- int ampersand = ent.indexOfChar ('&');
-
- while (ampersand >= 0)
- {
- const int semiColon = ent.indexOf (i + 1, ";");
-
- if (semiColon < 0)
- {
- setLastError ("entity without terminating semi-colon", false);
- break;
- }
-
- const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
-
- ent = ent.substring (0, ampersand)
- + resolved
- + ent.substring (semiColon + 1);
-
- ampersand = ent.indexOfChar (semiColon + 1, '&');
- }
-
- return ent;
- }
- }
- }
-
- setLastError ("unknown entity", true);
-
- return entity;
- }
-
- String XmlDocument::getParameterEntity (const String& entity)
- {
- for (int i = 0; i < tokenisedDTD.size(); ++i)
- {
- if (tokenisedDTD[i] == entity
- && tokenisedDTD [i - 1] == "%"
- && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
- {
- const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
-
- if (ent.equalsIgnoreCase ("system"))
- return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
-
- return ent.trim().unquoted();
- }
- }
-
- return entity;
- }
-
- }
|