The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

881 lines
25KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. The code included in this file is provided under the terms of the ISC license
  8. http://www.isc.org/downloads/software-support-policy/isc-license. Permission
  9. To use, copy, modify, and/or distribute this software for any purpose with or
  10. without fee is hereby granted provided that the above copyright notice and
  11. this permission notice appear in all copies.
  12. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  13. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  14. DISCLAIMED.
  15. ==============================================================================
  16. */
  17. namespace juce
  18. {
  19. XmlDocument::XmlDocument (const String& documentText)
  20. : originalText (documentText)
  21. {
  22. }
  23. XmlDocument::XmlDocument (const File& file)
  24. : inputSource (new FileInputSource (file))
  25. {
  26. }
  27. XmlDocument::~XmlDocument()
  28. {
  29. }
  30. XmlElement* XmlDocument::parse (const File& file)
  31. {
  32. XmlDocument doc (file);
  33. return doc.getDocumentElement();
  34. }
  35. XmlElement* XmlDocument::parse (const String& xmlData)
  36. {
  37. XmlDocument doc (xmlData);
  38. return doc.getDocumentElement();
  39. }
  40. void XmlDocument::setInputSource (InputSource* const newSource) noexcept
  41. {
  42. inputSource = newSource;
  43. }
  44. void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
  45. {
  46. ignoreEmptyTextElements = shouldBeIgnored;
  47. }
  48. namespace XmlIdentifierChars
  49. {
  50. static bool isIdentifierCharSlow (const juce_wchar c) noexcept
  51. {
  52. return CharacterFunctions::isLetterOrDigit (c)
  53. || c == '_' || c == '-' || c == ':' || c == '.';
  54. }
  55. static bool isIdentifierChar (const juce_wchar c) noexcept
  56. {
  57. static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
  58. return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
  59. : isIdentifierCharSlow (c);
  60. }
  61. /*static void generateIdentifierCharConstants()
  62. {
  63. uint32 n[8] = { 0 };
  64. for (int i = 0; i < 256; ++i)
  65. if (isIdentifierCharSlow (i))
  66. n[i >> 5] |= (1 << (i & 31));
  67. String s;
  68. for (int i = 0; i < 8; ++i)
  69. s << "0x" << String::toHexString ((int) n[i]) << ", ";
  70. DBG (s);
  71. }*/
  72. static String::CharPointerType findEndOfToken (String::CharPointerType p)
  73. {
  74. while (isIdentifierChar (*p))
  75. ++p;
  76. return p;
  77. }
  78. }
  79. XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
  80. {
  81. if (originalText.isEmpty() && inputSource != nullptr)
  82. {
  83. ScopedPointer<InputStream> in (inputSource->createInputStream());
  84. if (in != nullptr)
  85. {
  86. MemoryOutputStream data;
  87. data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
  88. #if JUCE_STRING_UTF_TYPE == 8
  89. if (data.getDataSize() > 2)
  90. {
  91. data.writeByte (0);
  92. const char* text = static_cast<const char*> (data.getData());
  93. if (CharPointer_UTF16::isByteOrderMarkBigEndian (text)
  94. || CharPointer_UTF16::isByteOrderMarkLittleEndian (text))
  95. {
  96. originalText = data.toString();
  97. }
  98. else
  99. {
  100. if (CharPointer_UTF8::isByteOrderMark (text))
  101. text += 3;
  102. // parse the input buffer directly to avoid copying it all to a string..
  103. return parseDocumentElement (String::CharPointerType (text), onlyReadOuterDocumentElement);
  104. }
  105. }
  106. #else
  107. originalText = data.toString();
  108. #endif
  109. }
  110. }
  111. return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
  112. }
  113. const String& XmlDocument::getLastParseError() const noexcept
  114. {
  115. return lastError;
  116. }
  117. void XmlDocument::setLastError (const String& desc, const bool carryOn)
  118. {
  119. lastError = desc;
  120. errorOccurred = ! carryOn;
  121. }
  122. String XmlDocument::getFileContents (const String& filename) const
  123. {
  124. if (inputSource != nullptr)
  125. {
  126. const ScopedPointer<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
  127. if (in != nullptr)
  128. return in->readEntireStreamAsString();
  129. }
  130. return {};
  131. }
  132. juce_wchar XmlDocument::readNextChar() noexcept
  133. {
  134. const juce_wchar c = input.getAndAdvance();
  135. if (c == 0)
  136. {
  137. outOfData = true;
  138. --input;
  139. }
  140. return c;
  141. }
  142. XmlElement* XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
  143. const bool onlyReadOuterDocumentElement)
  144. {
  145. input = textToParse;
  146. errorOccurred = false;
  147. outOfData = false;
  148. needToLoadDTD = true;
  149. if (textToParse.isEmpty())
  150. {
  151. lastError = "not enough input";
  152. }
  153. else if (! parseHeader())
  154. {
  155. lastError = "malformed header";
  156. }
  157. else if (! parseDTD())
  158. {
  159. lastError = "malformed DTD";
  160. }
  161. else
  162. {
  163. lastError.clear();
  164. ScopedPointer<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
  165. if (! errorOccurred)
  166. return result.release();
  167. }
  168. return nullptr;
  169. }
  170. bool XmlDocument::parseHeader()
  171. {
  172. skipNextWhiteSpace();
  173. if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<?xml"), 5) == 0)
  174. {
  175. const String::CharPointerType headerEnd (CharacterFunctions::find (input, CharPointer_ASCII ("?>")));
  176. if (headerEnd.isEmpty())
  177. return false;
  178. #if JUCE_DEBUG
  179. const String encoding (String (input, headerEnd)
  180. .fromFirstOccurrenceOf ("encoding", false, true)
  181. .fromFirstOccurrenceOf ("=", false, false)
  182. .fromFirstOccurrenceOf ("\"", false, false)
  183. .upToFirstOccurrenceOf ("\"", false, false).trim());
  184. /* If you load an XML document with a non-UTF encoding type, it may have been
  185. loaded wrongly.. Since all the files are read via the normal juce file streams,
  186. they're treated as UTF-8, so by the time it gets to the parser, the encoding will
  187. have been lost. Best plan is to stick to utf-8 or if you have specific files to
  188. read, use your own code to convert them to a unicode String, and pass that to the
  189. XML parser.
  190. */
  191. jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
  192. #endif
  193. input = headerEnd + 2;
  194. skipNextWhiteSpace();
  195. }
  196. return true;
  197. }
  198. bool XmlDocument::parseDTD()
  199. {
  200. if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<!DOCTYPE"), 9) == 0)
  201. {
  202. input += 9;
  203. const String::CharPointerType dtdStart (input);
  204. for (int n = 1; n > 0;)
  205. {
  206. const juce_wchar c = readNextChar();
  207. if (outOfData)
  208. return false;
  209. if (c == '<')
  210. ++n;
  211. else if (c == '>')
  212. --n;
  213. }
  214. dtdText = String (dtdStart, input - 1).trim();
  215. }
  216. return true;
  217. }
  218. void XmlDocument::skipNextWhiteSpace()
  219. {
  220. for (;;)
  221. {
  222. input = input.findEndOfWhitespace();
  223. if (input.isEmpty())
  224. {
  225. outOfData = true;
  226. break;
  227. }
  228. if (*input == '<')
  229. {
  230. if (input[1] == '!'
  231. && input[2] == '-'
  232. && input[3] == '-')
  233. {
  234. input += 4;
  235. const int closeComment = input.indexOf (CharPointer_ASCII ("-->"));
  236. if (closeComment < 0)
  237. {
  238. outOfData = true;
  239. break;
  240. }
  241. input += closeComment + 3;
  242. continue;
  243. }
  244. if (input[1] == '?')
  245. {
  246. input += 2;
  247. const int closeBracket = input.indexOf (CharPointer_ASCII ("?>"));
  248. if (closeBracket < 0)
  249. {
  250. outOfData = true;
  251. break;
  252. }
  253. input += closeBracket + 2;
  254. continue;
  255. }
  256. }
  257. break;
  258. }
  259. }
  260. void XmlDocument::readQuotedString (String& result)
  261. {
  262. const juce_wchar quote = readNextChar();
  263. while (! outOfData)
  264. {
  265. const juce_wchar c = readNextChar();
  266. if (c == quote)
  267. break;
  268. --input;
  269. if (c == '&')
  270. {
  271. readEntity (result);
  272. }
  273. else
  274. {
  275. const String::CharPointerType start (input);
  276. for (;;)
  277. {
  278. const juce_wchar character = *input;
  279. if (character == quote)
  280. {
  281. result.appendCharPointer (start, input);
  282. ++input;
  283. return;
  284. }
  285. else if (character == '&')
  286. {
  287. result.appendCharPointer (start, input);
  288. break;
  289. }
  290. else if (character == 0)
  291. {
  292. setLastError ("unmatched quotes", false);
  293. outOfData = true;
  294. break;
  295. }
  296. ++input;
  297. }
  298. }
  299. }
  300. }
  301. XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
  302. {
  303. XmlElement* node = nullptr;
  304. skipNextWhiteSpace();
  305. if (outOfData)
  306. return nullptr;
  307. if (*input == '<')
  308. {
  309. ++input;
  310. String::CharPointerType endOfToken (XmlIdentifierChars::findEndOfToken (input));
  311. if (endOfToken == input)
  312. {
  313. // no tag name - but allow for a gap after the '<' before giving an error
  314. skipNextWhiteSpace();
  315. endOfToken = XmlIdentifierChars::findEndOfToken (input);
  316. if (endOfToken == input)
  317. {
  318. setLastError ("tag name missing", false);
  319. return node;
  320. }
  321. }
  322. node = new XmlElement (input, endOfToken);
  323. input = endOfToken;
  324. LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
  325. // look for attributes
  326. for (;;)
  327. {
  328. skipNextWhiteSpace();
  329. const juce_wchar c = *input;
  330. // empty tag..
  331. if (c == '/' && input[1] == '>')
  332. {
  333. input += 2;
  334. break;
  335. }
  336. // parse the guts of the element..
  337. if (c == '>')
  338. {
  339. ++input;
  340. if (alsoParseSubElements)
  341. readChildElements (*node);
  342. break;
  343. }
  344. // get an attribute..
  345. if (XmlIdentifierChars::isIdentifierChar (c))
  346. {
  347. String::CharPointerType attNameEnd (XmlIdentifierChars::findEndOfToken (input));
  348. if (attNameEnd != input)
  349. {
  350. const String::CharPointerType attNameStart (input);
  351. input = attNameEnd;
  352. skipNextWhiteSpace();
  353. if (readNextChar() == '=')
  354. {
  355. skipNextWhiteSpace();
  356. const juce_wchar nextChar = *input;
  357. if (nextChar == '"' || nextChar == '\'')
  358. {
  359. XmlElement::XmlAttributeNode* const newAtt
  360. = new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
  361. readQuotedString (newAtt->value);
  362. attributeAppender.append (newAtt);
  363. continue;
  364. }
  365. }
  366. else
  367. {
  368. setLastError ("expected '=' after attribute '"
  369. + String (attNameStart, attNameEnd) + "'", false);
  370. return node;
  371. }
  372. }
  373. }
  374. else
  375. {
  376. if (! outOfData)
  377. setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
  378. }
  379. break;
  380. }
  381. }
  382. return node;
  383. }
  384. void XmlDocument::readChildElements (XmlElement& parent)
  385. {
  386. LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
  387. for (;;)
  388. {
  389. const String::CharPointerType preWhitespaceInput (input);
  390. skipNextWhiteSpace();
  391. if (outOfData)
  392. {
  393. setLastError ("unmatched tags", false);
  394. break;
  395. }
  396. if (*input == '<')
  397. {
  398. const juce_wchar c1 = input[1];
  399. if (c1 == '/')
  400. {
  401. // our close tag..
  402. const int closeTag = input.indexOf ((juce_wchar) '>');
  403. if (closeTag >= 0)
  404. input += closeTag + 1;
  405. break;
  406. }
  407. if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_ASCII ("[CDATA["), 7) == 0)
  408. {
  409. input += 9;
  410. const String::CharPointerType inputStart (input);
  411. for (;;)
  412. {
  413. const juce_wchar c0 = *input;
  414. if (c0 == 0)
  415. {
  416. setLastError ("unterminated CDATA section", false);
  417. outOfData = true;
  418. break;
  419. }
  420. else if (c0 == ']'
  421. && input[1] == ']'
  422. && input[2] == '>')
  423. {
  424. childAppender.append (XmlElement::createTextElement (String (inputStart, input)));
  425. input += 3;
  426. break;
  427. }
  428. ++input;
  429. }
  430. }
  431. else
  432. {
  433. // this is some other element, so parse and add it..
  434. if (XmlElement* const n = readNextElement (true))
  435. childAppender.append (n);
  436. else
  437. break;
  438. }
  439. }
  440. else // must be a character block
  441. {
  442. input = preWhitespaceInput; // roll back to include the leading whitespace
  443. MemoryOutputStream textElementContent;
  444. bool contentShouldBeUsed = ! ignoreEmptyTextElements;
  445. for (;;)
  446. {
  447. const juce_wchar c = *input;
  448. if (c == '<')
  449. {
  450. if (input[1] == '!' && input[2] == '-' && input[3] == '-')
  451. {
  452. input += 4;
  453. const int closeComment = input.indexOf (CharPointer_ASCII ("-->"));
  454. if (closeComment < 0)
  455. {
  456. setLastError ("unterminated comment", false);
  457. outOfData = true;
  458. return;
  459. }
  460. input += closeComment + 3;
  461. continue;
  462. }
  463. break;
  464. }
  465. if (c == 0)
  466. {
  467. setLastError ("unmatched tags", false);
  468. outOfData = true;
  469. return;
  470. }
  471. if (c == '&')
  472. {
  473. String entity;
  474. readEntity (entity);
  475. if (entity.startsWithChar ('<') && entity [1] != 0)
  476. {
  477. const String::CharPointerType oldInput (input);
  478. const bool oldOutOfData = outOfData;
  479. input = entity.getCharPointer();
  480. outOfData = false;
  481. while (XmlElement* n = readNextElement (true))
  482. childAppender.append (n);
  483. input = oldInput;
  484. outOfData = oldOutOfData;
  485. }
  486. else
  487. {
  488. textElementContent << entity;
  489. contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
  490. }
  491. }
  492. else
  493. {
  494. for (;; ++input)
  495. {
  496. juce_wchar nextChar = *input;
  497. if (nextChar == '\r')
  498. {
  499. nextChar = '\n';
  500. if (input[1] == '\n')
  501. continue;
  502. }
  503. if (nextChar == '<' || nextChar == '&')
  504. break;
  505. if (nextChar == 0)
  506. {
  507. setLastError ("unmatched tags", false);
  508. outOfData = true;
  509. return;
  510. }
  511. textElementContent.appendUTF8Char (nextChar);
  512. contentShouldBeUsed = contentShouldBeUsed || ! CharacterFunctions::isWhitespace (nextChar);
  513. }
  514. }
  515. }
  516. if (contentShouldBeUsed)
  517. childAppender.append (XmlElement::createTextElement (textElementContent.toUTF8()));
  518. }
  519. }
  520. }
  521. void XmlDocument::readEntity (String& result)
  522. {
  523. // skip over the ampersand
  524. ++input;
  525. if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("amp;"), 4) == 0)
  526. {
  527. input += 4;
  528. result += '&';
  529. }
  530. else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("quot;"), 5) == 0)
  531. {
  532. input += 5;
  533. result += '"';
  534. }
  535. else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("apos;"), 5) == 0)
  536. {
  537. input += 5;
  538. result += '\'';
  539. }
  540. else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("lt;"), 3) == 0)
  541. {
  542. input += 3;
  543. result += '<';
  544. }
  545. else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("gt;"), 3) == 0)
  546. {
  547. input += 3;
  548. result += '>';
  549. }
  550. else if (*input == '#')
  551. {
  552. int charCode = 0;
  553. ++input;
  554. if (*input == 'x' || *input == 'X')
  555. {
  556. ++input;
  557. int numChars = 0;
  558. while (input[0] != ';')
  559. {
  560. const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
  561. if (hexValue < 0 || ++numChars > 8)
  562. {
  563. setLastError ("illegal escape sequence", true);
  564. break;
  565. }
  566. charCode = (charCode << 4) | hexValue;
  567. ++input;
  568. }
  569. ++input;
  570. }
  571. else if (input[0] >= '0' && input[0] <= '9')
  572. {
  573. int numChars = 0;
  574. while (input[0] != ';')
  575. {
  576. if (++numChars > 12)
  577. {
  578. setLastError ("illegal escape sequence", true);
  579. break;
  580. }
  581. charCode = charCode * 10 + ((int) input[0] - '0');
  582. ++input;
  583. }
  584. ++input;
  585. }
  586. else
  587. {
  588. setLastError ("illegal escape sequence", true);
  589. result += '&';
  590. return;
  591. }
  592. result << (juce_wchar) charCode;
  593. }
  594. else
  595. {
  596. const String::CharPointerType entityNameStart (input);
  597. const int closingSemiColon = input.indexOf ((juce_wchar) ';');
  598. if (closingSemiColon < 0)
  599. {
  600. outOfData = true;
  601. result += '&';
  602. }
  603. else
  604. {
  605. input += closingSemiColon + 1;
  606. result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
  607. }
  608. }
  609. }
  610. String XmlDocument::expandEntity (const String& ent)
  611. {
  612. if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
  613. if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
  614. if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
  615. if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
  616. if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
  617. if (ent[0] == '#')
  618. {
  619. const juce_wchar char1 = ent[1];
  620. if (char1 == 'x' || char1 == 'X')
  621. return String::charToString (static_cast<juce_wchar> (ent.substring (2).getHexValue32()));
  622. if (char1 >= '0' && char1 <= '9')
  623. return String::charToString (static_cast<juce_wchar> (ent.substring (1).getIntValue()));
  624. setLastError ("illegal escape sequence", false);
  625. return String::charToString ('&');
  626. }
  627. return expandExternalEntity (ent);
  628. }
  629. String XmlDocument::expandExternalEntity (const String& entity)
  630. {
  631. if (needToLoadDTD)
  632. {
  633. if (dtdText.isNotEmpty())
  634. {
  635. dtdText = dtdText.trimCharactersAtEnd (">");
  636. tokenisedDTD.addTokens (dtdText, true);
  637. if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
  638. && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
  639. {
  640. const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
  641. tokenisedDTD.clear();
  642. tokenisedDTD.addTokens (getFileContents (fn), true);
  643. }
  644. else
  645. {
  646. tokenisedDTD.clear();
  647. const int openBracket = dtdText.indexOfChar ('[');
  648. if (openBracket > 0)
  649. {
  650. const int closeBracket = dtdText.lastIndexOfChar (']');
  651. if (closeBracket > openBracket)
  652. tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
  653. closeBracket), true);
  654. }
  655. }
  656. for (int i = tokenisedDTD.size(); --i >= 0;)
  657. {
  658. if (tokenisedDTD[i].startsWithChar ('%')
  659. && tokenisedDTD[i].endsWithChar (';'))
  660. {
  661. const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
  662. StringArray newToks;
  663. newToks.addTokens (parsed, true);
  664. tokenisedDTD.remove (i);
  665. for (int j = newToks.size(); --j >= 0;)
  666. tokenisedDTD.insert (i, newToks[j]);
  667. }
  668. }
  669. }
  670. needToLoadDTD = false;
  671. }
  672. for (int i = 0; i < tokenisedDTD.size(); ++i)
  673. {
  674. if (tokenisedDTD[i] == entity)
  675. {
  676. if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
  677. {
  678. String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
  679. // check for sub-entities..
  680. int ampersand = ent.indexOfChar ('&');
  681. while (ampersand >= 0)
  682. {
  683. const int semiColon = ent.indexOf (i + 1, ";");
  684. if (semiColon < 0)
  685. {
  686. setLastError ("entity without terminating semi-colon", false);
  687. break;
  688. }
  689. const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
  690. ent = ent.substring (0, ampersand)
  691. + resolved
  692. + ent.substring (semiColon + 1);
  693. ampersand = ent.indexOfChar (semiColon + 1, '&');
  694. }
  695. return ent;
  696. }
  697. }
  698. }
  699. setLastError ("unknown entity", true);
  700. return entity;
  701. }
  702. String XmlDocument::getParameterEntity (const String& entity)
  703. {
  704. for (int i = 0; i < tokenisedDTD.size(); ++i)
  705. {
  706. if (tokenisedDTD[i] == entity
  707. && tokenisedDTD [i - 1] == "%"
  708. && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
  709. {
  710. const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
  711. if (ent.equalsIgnoreCase ("system"))
  712. return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
  713. return ent.trim().unquoted();
  714. }
  715. }
  716. return entity;
  717. }
  718. }