The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

860 lines
25KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. The code included in this file is provided under the terms of the ISC license
  8. http://www.isc.org/downloads/software-support-policy/isc-license. Permission
  9. To use, copy, modify, and/or distribute this software for any purpose with or
  10. without fee is hereby granted provided that the above copyright notice and
  11. this permission notice appear in all copies.
  12. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  13. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  14. DISCLAIMED.
  15. ==============================================================================
  16. */
  17. namespace juce
  18. {
  19. XmlDocument::XmlDocument (const String& text) : originalText (text) {}
  20. XmlDocument::XmlDocument (const File& file) : inputSource (new FileInputSource (file)) {}
  21. XmlDocument::~XmlDocument() {}
  22. XmlElement* XmlDocument::parse (const File& file)
  23. {
  24. XmlDocument doc (file);
  25. return doc.getDocumentElement();
  26. }
  27. XmlElement* XmlDocument::parse (const String& xmlData)
  28. {
  29. XmlDocument doc (xmlData);
  30. return doc.getDocumentElement();
  31. }
  32. void XmlDocument::setInputSource (InputSource* const newSource) noexcept
  33. {
  34. inputSource = newSource;
  35. }
  36. void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
  37. {
  38. ignoreEmptyTextElements = shouldBeIgnored;
  39. }
  40. namespace XmlIdentifierChars
  41. {
  42. static bool isIdentifierCharSlow (const juce_wchar c) noexcept
  43. {
  44. return CharacterFunctions::isLetterOrDigit (c)
  45. || c == '_' || c == '-' || c == ':' || c == '.';
  46. }
  47. static bool isIdentifierChar (const juce_wchar c) noexcept
  48. {
  49. static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
  50. return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
  51. : isIdentifierCharSlow (c);
  52. }
  53. /*static void generateIdentifierCharConstants()
  54. {
  55. uint32 n[8] = { 0 };
  56. for (int i = 0; i < 256; ++i)
  57. if (isIdentifierCharSlow (i))
  58. n[i >> 5] |= (1 << (i & 31));
  59. String s;
  60. for (int i = 0; i < 8; ++i)
  61. s << "0x" << String::toHexString ((int) n[i]) << ", ";
  62. DBG (s);
  63. }*/
  64. static String::CharPointerType findEndOfToken (String::CharPointerType p) noexcept
  65. {
  66. while (isIdentifierChar (*p))
  67. ++p;
  68. return p;
  69. }
  70. }
  71. XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
  72. {
  73. if (originalText.isEmpty() && inputSource != nullptr)
  74. {
  75. if (ScopedPointer<InputStream> in = inputSource->createInputStream())
  76. {
  77. MemoryOutputStream data;
  78. data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
  79. #if JUCE_STRING_UTF_TYPE == 8
  80. if (data.getDataSize() > 2)
  81. {
  82. data.writeByte (0);
  83. auto* text = static_cast<const char*> (data.getData());
  84. if (CharPointer_UTF16::isByteOrderMarkBigEndian (text)
  85. || CharPointer_UTF16::isByteOrderMarkLittleEndian (text))
  86. {
  87. originalText = data.toString();
  88. }
  89. else
  90. {
  91. if (CharPointer_UTF8::isByteOrderMark (text))
  92. text += 3;
  93. // parse the input buffer directly to avoid copying it all to a string..
  94. return parseDocumentElement (String::CharPointerType (text), onlyReadOuterDocumentElement);
  95. }
  96. }
  97. #else
  98. originalText = data.toString();
  99. #endif
  100. }
  101. }
  102. return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
  103. }
  104. const String& XmlDocument::getLastParseError() const noexcept
  105. {
  106. return lastError;
  107. }
  108. void XmlDocument::setLastError (const String& desc, const bool carryOn)
  109. {
  110. lastError = desc;
  111. errorOccurred = ! carryOn;
  112. }
  113. String XmlDocument::getFileContents (const String& filename) const
  114. {
  115. if (inputSource != nullptr)
  116. if (ScopedPointer<InputStream> in = inputSource->createInputStreamFor (filename.trim().unquoted()))
  117. return in->readEntireStreamAsString();
  118. return {};
  119. }
  120. juce_wchar XmlDocument::readNextChar() noexcept
  121. {
  122. auto c = input.getAndAdvance();
  123. if (c == 0)
  124. {
  125. outOfData = true;
  126. --input;
  127. }
  128. return c;
  129. }
  130. XmlElement* XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
  131. const bool onlyReadOuterDocumentElement)
  132. {
  133. input = textToParse;
  134. errorOccurred = false;
  135. outOfData = false;
  136. needToLoadDTD = true;
  137. if (textToParse.isEmpty())
  138. {
  139. lastError = "not enough input";
  140. }
  141. else if (! parseHeader())
  142. {
  143. lastError = "malformed header";
  144. }
  145. else if (! parseDTD())
  146. {
  147. lastError = "malformed DTD";
  148. }
  149. else
  150. {
  151. lastError.clear();
  152. ScopedPointer<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
  153. if (! errorOccurred)
  154. return result.release();
  155. }
  156. return nullptr;
  157. }
  158. bool XmlDocument::parseHeader()
  159. {
  160. skipNextWhiteSpace();
  161. if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<?xml"), 5) == 0)
  162. {
  163. auto headerEnd = CharacterFunctions::find (input, CharPointer_ASCII ("?>"));
  164. if (headerEnd.isEmpty())
  165. return false;
  166. #if JUCE_DEBUG
  167. auto encoding = String (input, headerEnd)
  168. .fromFirstOccurrenceOf ("encoding", false, true)
  169. .fromFirstOccurrenceOf ("=", false, false)
  170. .fromFirstOccurrenceOf ("\"", false, false)
  171. .upToFirstOccurrenceOf ("\"", false, false)
  172. .trim();
  173. /* If you load an XML document with a non-UTF encoding type, it may have been
  174. loaded wrongly.. Since all the files are read via the normal juce file streams,
  175. they're treated as UTF-8, so by the time it gets to the parser, the encoding will
  176. have been lost. Best plan is to stick to utf-8 or if you have specific files to
  177. read, use your own code to convert them to a unicode String, and pass that to the
  178. XML parser.
  179. */
  180. jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
  181. #endif
  182. input = headerEnd + 2;
  183. skipNextWhiteSpace();
  184. }
  185. return true;
  186. }
  187. bool XmlDocument::parseDTD()
  188. {
  189. if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<!DOCTYPE"), 9) == 0)
  190. {
  191. input += 9;
  192. auto dtdStart = input;
  193. for (int n = 1; n > 0;)
  194. {
  195. auto c = readNextChar();
  196. if (outOfData)
  197. return false;
  198. if (c == '<')
  199. ++n;
  200. else if (c == '>')
  201. --n;
  202. }
  203. dtdText = String (dtdStart, input - 1).trim();
  204. }
  205. return true;
  206. }
  207. void XmlDocument::skipNextWhiteSpace()
  208. {
  209. for (;;)
  210. {
  211. input = input.findEndOfWhitespace();
  212. if (input.isEmpty())
  213. {
  214. outOfData = true;
  215. break;
  216. }
  217. if (*input == '<')
  218. {
  219. if (input[1] == '!'
  220. && input[2] == '-'
  221. && input[3] == '-')
  222. {
  223. input += 4;
  224. auto closeComment = input.indexOf (CharPointer_ASCII ("-->"));
  225. if (closeComment < 0)
  226. {
  227. outOfData = true;
  228. break;
  229. }
  230. input += closeComment + 3;
  231. continue;
  232. }
  233. if (input[1] == '?')
  234. {
  235. input += 2;
  236. auto closeBracket = input.indexOf (CharPointer_ASCII ("?>"));
  237. if (closeBracket < 0)
  238. {
  239. outOfData = true;
  240. break;
  241. }
  242. input += closeBracket + 2;
  243. continue;
  244. }
  245. }
  246. break;
  247. }
  248. }
  249. void XmlDocument::readQuotedString (String& result)
  250. {
  251. auto quote = readNextChar();
  252. while (! outOfData)
  253. {
  254. auto c = readNextChar();
  255. if (c == quote)
  256. break;
  257. --input;
  258. if (c == '&')
  259. {
  260. readEntity (result);
  261. }
  262. else
  263. {
  264. auto start = input;
  265. for (;;)
  266. {
  267. auto character = *input;
  268. if (character == quote)
  269. {
  270. result.appendCharPointer (start, input);
  271. ++input;
  272. return;
  273. }
  274. if (character == '&')
  275. {
  276. result.appendCharPointer (start, input);
  277. break;
  278. }
  279. if (character == 0)
  280. {
  281. setLastError ("unmatched quotes", false);
  282. outOfData = true;
  283. break;
  284. }
  285. ++input;
  286. }
  287. }
  288. }
  289. }
  290. XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
  291. {
  292. XmlElement* node = nullptr;
  293. skipNextWhiteSpace();
  294. if (outOfData)
  295. return nullptr;
  296. if (*input == '<')
  297. {
  298. ++input;
  299. auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
  300. if (endOfToken == input)
  301. {
  302. // no tag name - but allow for a gap after the '<' before giving an error
  303. skipNextWhiteSpace();
  304. endOfToken = XmlIdentifierChars::findEndOfToken (input);
  305. if (endOfToken == input)
  306. {
  307. setLastError ("tag name missing", false);
  308. return node;
  309. }
  310. }
  311. node = new XmlElement (input, endOfToken);
  312. input = endOfToken;
  313. LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
  314. // look for attributes
  315. for (;;)
  316. {
  317. skipNextWhiteSpace();
  318. auto c = *input;
  319. // empty tag..
  320. if (c == '/' && input[1] == '>')
  321. {
  322. input += 2;
  323. break;
  324. }
  325. // parse the guts of the element..
  326. if (c == '>')
  327. {
  328. ++input;
  329. if (alsoParseSubElements)
  330. readChildElements (*node);
  331. break;
  332. }
  333. // get an attribute..
  334. if (XmlIdentifierChars::isIdentifierChar (c))
  335. {
  336. auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
  337. if (attNameEnd != input)
  338. {
  339. auto attNameStart = input;
  340. input = attNameEnd;
  341. skipNextWhiteSpace();
  342. if (readNextChar() == '=')
  343. {
  344. skipNextWhiteSpace();
  345. auto nextChar = *input;
  346. if (nextChar == '"' || nextChar == '\'')
  347. {
  348. auto* newAtt = new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
  349. readQuotedString (newAtt->value);
  350. attributeAppender.append (newAtt);
  351. continue;
  352. }
  353. }
  354. else
  355. {
  356. setLastError ("expected '=' after attribute '"
  357. + String (attNameStart, attNameEnd) + "'", false);
  358. return node;
  359. }
  360. }
  361. }
  362. else
  363. {
  364. if (! outOfData)
  365. setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
  366. }
  367. break;
  368. }
  369. }
  370. return node;
  371. }
  372. void XmlDocument::readChildElements (XmlElement& parent)
  373. {
  374. LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
  375. for (;;)
  376. {
  377. auto preWhitespaceInput = input;
  378. skipNextWhiteSpace();
  379. if (outOfData)
  380. {
  381. setLastError ("unmatched tags", false);
  382. break;
  383. }
  384. if (*input == '<')
  385. {
  386. auto c1 = input[1];
  387. if (c1 == '/')
  388. {
  389. // our close tag..
  390. auto closeTag = input.indexOf ((juce_wchar) '>');
  391. if (closeTag >= 0)
  392. input += closeTag + 1;
  393. break;
  394. }
  395. if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_ASCII ("[CDATA["), 7) == 0)
  396. {
  397. input += 9;
  398. auto inputStart = input;
  399. for (;;)
  400. {
  401. auto c0 = *input;
  402. if (c0 == 0)
  403. {
  404. setLastError ("unterminated CDATA section", false);
  405. outOfData = true;
  406. break;
  407. }
  408. if (c0 == ']' && input[1] == ']' && input[2] == '>')
  409. {
  410. childAppender.append (XmlElement::createTextElement (String (inputStart, input)));
  411. input += 3;
  412. break;
  413. }
  414. ++input;
  415. }
  416. }
  417. else
  418. {
  419. // this is some other element, so parse and add it..
  420. if (auto* n = readNextElement (true))
  421. childAppender.append (n);
  422. else
  423. break;
  424. }
  425. }
  426. else // must be a character block
  427. {
  428. input = preWhitespaceInput; // roll back to include the leading whitespace
  429. MemoryOutputStream textElementContent;
  430. bool contentShouldBeUsed = ! ignoreEmptyTextElements;
  431. for (;;)
  432. {
  433. auto c = *input;
  434. if (c == '<')
  435. {
  436. if (input[1] == '!' && input[2] == '-' && input[3] == '-')
  437. {
  438. input += 4;
  439. auto closeComment = input.indexOf (CharPointer_ASCII ("-->"));
  440. if (closeComment < 0)
  441. {
  442. setLastError ("unterminated comment", false);
  443. outOfData = true;
  444. return;
  445. }
  446. input += closeComment + 3;
  447. continue;
  448. }
  449. break;
  450. }
  451. if (c == 0)
  452. {
  453. setLastError ("unmatched tags", false);
  454. outOfData = true;
  455. return;
  456. }
  457. if (c == '&')
  458. {
  459. String entity;
  460. readEntity (entity);
  461. if (entity.startsWithChar ('<') && entity [1] != 0)
  462. {
  463. auto oldInput = input;
  464. auto oldOutOfData = outOfData;
  465. input = entity.getCharPointer();
  466. outOfData = false;
  467. while (auto* n = readNextElement (true))
  468. childAppender.append (n);
  469. input = oldInput;
  470. outOfData = oldOutOfData;
  471. }
  472. else
  473. {
  474. textElementContent << entity;
  475. contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
  476. }
  477. }
  478. else
  479. {
  480. for (;; ++input)
  481. {
  482. auto nextChar = *input;
  483. if (nextChar == '\r')
  484. {
  485. nextChar = '\n';
  486. if (input[1] == '\n')
  487. continue;
  488. }
  489. if (nextChar == '<' || nextChar == '&')
  490. break;
  491. if (nextChar == 0)
  492. {
  493. setLastError ("unmatched tags", false);
  494. outOfData = true;
  495. return;
  496. }
  497. textElementContent.appendUTF8Char (nextChar);
  498. contentShouldBeUsed = contentShouldBeUsed || ! CharacterFunctions::isWhitespace (nextChar);
  499. }
  500. }
  501. }
  502. if (contentShouldBeUsed)
  503. childAppender.append (XmlElement::createTextElement (textElementContent.toUTF8()));
  504. }
  505. }
  506. }
  507. void XmlDocument::readEntity (String& result)
  508. {
  509. // skip over the ampersand
  510. ++input;
  511. if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("amp;"), 4) == 0)
  512. {
  513. input += 4;
  514. result += '&';
  515. }
  516. else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("quot;"), 5) == 0)
  517. {
  518. input += 5;
  519. result += '"';
  520. }
  521. else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("apos;"), 5) == 0)
  522. {
  523. input += 5;
  524. result += '\'';
  525. }
  526. else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("lt;"), 3) == 0)
  527. {
  528. input += 3;
  529. result += '<';
  530. }
  531. else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("gt;"), 3) == 0)
  532. {
  533. input += 3;
  534. result += '>';
  535. }
  536. else if (*input == '#')
  537. {
  538. int charCode = 0;
  539. ++input;
  540. if (*input == 'x' || *input == 'X')
  541. {
  542. ++input;
  543. int numChars = 0;
  544. while (input[0] != ';')
  545. {
  546. auto hexValue = CharacterFunctions::getHexDigitValue (input[0]);
  547. if (hexValue < 0 || ++numChars > 8)
  548. {
  549. setLastError ("illegal escape sequence", true);
  550. break;
  551. }
  552. charCode = (charCode << 4) | hexValue;
  553. ++input;
  554. }
  555. ++input;
  556. }
  557. else if (input[0] >= '0' && input[0] <= '9')
  558. {
  559. int numChars = 0;
  560. while (input[0] != ';')
  561. {
  562. if (++numChars > 12)
  563. {
  564. setLastError ("illegal escape sequence", true);
  565. break;
  566. }
  567. charCode = charCode * 10 + ((int) input[0] - '0');
  568. ++input;
  569. }
  570. ++input;
  571. }
  572. else
  573. {
  574. setLastError ("illegal escape sequence", true);
  575. result += '&';
  576. return;
  577. }
  578. result << (juce_wchar) charCode;
  579. }
  580. else
  581. {
  582. auto entityNameStart = input;
  583. auto closingSemiColon = input.indexOf ((juce_wchar) ';');
  584. if (closingSemiColon < 0)
  585. {
  586. outOfData = true;
  587. result += '&';
  588. }
  589. else
  590. {
  591. input += closingSemiColon + 1;
  592. result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
  593. }
  594. }
  595. }
  596. String XmlDocument::expandEntity (const String& ent)
  597. {
  598. if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
  599. if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
  600. if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
  601. if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
  602. if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
  603. if (ent[0] == '#')
  604. {
  605. auto char1 = ent[1];
  606. if (char1 == 'x' || char1 == 'X')
  607. return String::charToString (static_cast<juce_wchar> (ent.substring (2).getHexValue32()));
  608. if (char1 >= '0' && char1 <= '9')
  609. return String::charToString (static_cast<juce_wchar> (ent.substring (1).getIntValue()));
  610. setLastError ("illegal escape sequence", false);
  611. return String::charToString ('&');
  612. }
  613. return expandExternalEntity (ent);
  614. }
  615. String XmlDocument::expandExternalEntity (const String& entity)
  616. {
  617. if (needToLoadDTD)
  618. {
  619. if (dtdText.isNotEmpty())
  620. {
  621. dtdText = dtdText.trimCharactersAtEnd (">");
  622. tokenisedDTD.addTokens (dtdText, true);
  623. if (tokenisedDTD[tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
  624. && tokenisedDTD[tokenisedDTD.size() - 1].isQuotedString())
  625. {
  626. auto fn = tokenisedDTD[tokenisedDTD.size() - 1];
  627. tokenisedDTD.clear();
  628. tokenisedDTD.addTokens (getFileContents (fn), true);
  629. }
  630. else
  631. {
  632. tokenisedDTD.clear();
  633. auto openBracket = dtdText.indexOfChar ('[');
  634. if (openBracket > 0)
  635. {
  636. auto closeBracket = dtdText.lastIndexOfChar (']');
  637. if (closeBracket > openBracket)
  638. tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
  639. closeBracket), true);
  640. }
  641. }
  642. for (int i = tokenisedDTD.size(); --i >= 0;)
  643. {
  644. if (tokenisedDTD[i].startsWithChar ('%')
  645. && tokenisedDTD[i].endsWithChar (';'))
  646. {
  647. auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
  648. StringArray newToks;
  649. newToks.addTokens (parsed, true);
  650. tokenisedDTD.remove (i);
  651. for (int j = newToks.size(); --j >= 0;)
  652. tokenisedDTD.insert (i, newToks[j]);
  653. }
  654. }
  655. }
  656. needToLoadDTD = false;
  657. }
  658. for (int i = 0; i < tokenisedDTD.size(); ++i)
  659. {
  660. if (tokenisedDTD[i] == entity)
  661. {
  662. if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
  663. {
  664. auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted();
  665. // check for sub-entities..
  666. auto ampersand = ent.indexOfChar ('&');
  667. while (ampersand >= 0)
  668. {
  669. auto semiColon = ent.indexOf (i + 1, ";");
  670. if (semiColon < 0)
  671. {
  672. setLastError ("entity without terminating semi-colon", false);
  673. break;
  674. }
  675. auto resolved = expandEntity (ent.substring (i + 1, semiColon));
  676. ent = ent.substring (0, ampersand)
  677. + resolved
  678. + ent.substring (semiColon + 1);
  679. ampersand = ent.indexOfChar (semiColon + 1, '&');
  680. }
  681. return ent;
  682. }
  683. }
  684. }
  685. setLastError ("unknown entity", true);
  686. return entity;
  687. }
  688. String XmlDocument::getParameterEntity (const String& entity)
  689. {
  690. for (int i = 0; i < tokenisedDTD.size(); ++i)
  691. {
  692. if (tokenisedDTD[i] == entity
  693. && tokenisedDTD [i - 1] == "%"
  694. && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
  695. {
  696. auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (">");
  697. if (ent.equalsIgnoreCase ("system"))
  698. return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
  699. return ent.trim().unquoted();
  700. }
  701. }
  702. return entity;
  703. }
  704. }