The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

860 lines
25KB

  1. /*
  2. ==============================================================================
  3. This file is part of the juce_core module of the JUCE library.
  4. Copyright (c) 2013 - Raw Material Software Ltd.
  5. Permission to use, copy, modify, and/or distribute this software for any purpose with
  6. or without fee is hereby granted, provided that the above copyright notice and this
  7. permission notice appear in all copies.
  8. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  9. TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN
  10. NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
  11. DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  12. IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  13. CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. ------------------------------------------------------------------------------
  15. NOTE! This permissive ISC license applies ONLY to files within the juce_core module!
  16. All other JUCE modules are covered by a dual GPL/commercial license, so if you are
  17. using any other modules, be sure to check that you also comply with their license.
  18. For more details, visit www.juce.com
  19. ==============================================================================
  20. */
  21. XmlDocument::XmlDocument (const String& documentText)
  22. : originalText (documentText),
  23. input (nullptr),
  24. outOfData (false),
  25. errorOccurred (false),
  26. needToLoadDTD (false),
  27. ignoreEmptyTextElements (true)
  28. {
  29. }
  30. XmlDocument::XmlDocument (const File& file)
  31. : input (nullptr),
  32. outOfData (false),
  33. errorOccurred (false),
  34. needToLoadDTD (false),
  35. ignoreEmptyTextElements (true),
  36. inputSource (new FileInputSource (file))
  37. {
  38. }
  39. XmlDocument::~XmlDocument()
  40. {
  41. }
  42. XmlElement* XmlDocument::parse (const File& file)
  43. {
  44. XmlDocument doc (file);
  45. return doc.getDocumentElement();
  46. }
  47. XmlElement* XmlDocument::parse (const String& xmlData)
  48. {
  49. XmlDocument doc (xmlData);
  50. return doc.getDocumentElement();
  51. }
  52. void XmlDocument::setInputSource (InputSource* const newSource) noexcept
  53. {
  54. inputSource = newSource;
  55. }
  56. void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
  57. {
  58. ignoreEmptyTextElements = shouldBeIgnored;
  59. }
  60. namespace XmlIdentifierChars
  61. {
  62. static bool isIdentifierCharSlow (const juce_wchar c) noexcept
  63. {
  64. return CharacterFunctions::isLetterOrDigit (c)
  65. || c == '_' || c == '-' || c == ':' || c == '.';
  66. }
  67. static bool isIdentifierChar (const juce_wchar c) noexcept
  68. {
  69. static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
  70. return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
  71. : isIdentifierCharSlow (c);
  72. }
  73. /*static void generateIdentifierCharConstants()
  74. {
  75. uint32 n[8] = { 0 };
  76. for (int i = 0; i < 256; ++i)
  77. if (isIdentifierCharSlow (i))
  78. n[i >> 5] |= (1 << (i & 31));
  79. String s;
  80. for (int i = 0; i < 8; ++i)
  81. s << "0x" << String::toHexString ((int) n[i]) << ", ";
  82. DBG (s);
  83. }*/
  84. }
  85. XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
  86. {
  87. String textToParse (originalText);
  88. if (textToParse.isEmpty() && inputSource != nullptr)
  89. {
  90. ScopedPointer <InputStream> in (inputSource->createInputStream());
  91. if (in != nullptr)
  92. {
  93. MemoryOutputStream data;
  94. data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
  95. textToParse = data.toString();
  96. if (! onlyReadOuterDocumentElement)
  97. originalText = textToParse;
  98. }
  99. }
  100. input = textToParse.getCharPointer();
  101. lastError = String::empty;
  102. errorOccurred = false;
  103. outOfData = false;
  104. needToLoadDTD = true;
  105. if (textToParse.isEmpty())
  106. {
  107. lastError = "not enough input";
  108. }
  109. else
  110. {
  111. skipHeader();
  112. if (input.getAddress() != nullptr)
  113. {
  114. ScopedPointer <XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
  115. if (! errorOccurred)
  116. return result.release();
  117. }
  118. else
  119. {
  120. lastError = "incorrect xml header";
  121. }
  122. }
  123. return nullptr;
  124. }
  125. const String& XmlDocument::getLastParseError() const noexcept
  126. {
  127. return lastError;
  128. }
  129. void XmlDocument::setLastError (const String& desc, const bool carryOn)
  130. {
  131. lastError = desc;
  132. errorOccurred = ! carryOn;
  133. }
  134. String XmlDocument::getFileContents (const String& filename) const
  135. {
  136. if (inputSource != nullptr)
  137. {
  138. const ScopedPointer <InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
  139. if (in != nullptr)
  140. return in->readEntireStreamAsString();
  141. }
  142. return String::empty;
  143. }
  144. juce_wchar XmlDocument::readNextChar() noexcept
  145. {
  146. const juce_wchar c = input.getAndAdvance();
  147. if (c == 0)
  148. {
  149. outOfData = true;
  150. --input;
  151. }
  152. return c;
  153. }
  154. int XmlDocument::findNextTokenLength() noexcept
  155. {
  156. int len = 0;
  157. juce_wchar c = *input;
  158. while (XmlIdentifierChars::isIdentifierChar (c))
  159. c = input [++len];
  160. return len;
  161. }
  162. void XmlDocument::skipHeader()
  163. {
  164. const int headerStart = input.indexOf (CharPointer_UTF8 ("<?xml"));
  165. if (headerStart >= 0)
  166. {
  167. const int headerEnd = (input + headerStart).indexOf (CharPointer_UTF8 ("?>"));
  168. if (headerEnd < 0)
  169. return;
  170. #if JUCE_DEBUG
  171. const String header (input + headerStart, (size_t) (headerEnd - headerStart));
  172. const String encoding (header.fromFirstOccurrenceOf ("encoding", false, true)
  173. .fromFirstOccurrenceOf ("=", false, false)
  174. .fromFirstOccurrenceOf ("\"", false, false)
  175. .upToFirstOccurrenceOf ("\"", false, false).trim());
  176. /* If you load an XML document with a non-UTF encoding type, it may have been
  177. loaded wrongly.. Since all the files are read via the normal juce file streams,
  178. they're treated as UTF-8, so by the time it gets to the parser, the encoding will
  179. have been lost. Best plan is to stick to utf-8 or if you have specific files to
  180. read, use your own code to convert them to a unicode String, and pass that to the
  181. XML parser.
  182. */
  183. jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
  184. #endif
  185. input += headerEnd + 2;
  186. }
  187. skipNextWhiteSpace();
  188. const int docTypeIndex = input.indexOf (CharPointer_UTF8 ("<!DOCTYPE"));
  189. if (docTypeIndex < 0)
  190. return;
  191. input += docTypeIndex + 9;
  192. const String::CharPointerType docType (input);
  193. int n = 1;
  194. while (n > 0)
  195. {
  196. const juce_wchar c = readNextChar();
  197. if (outOfData)
  198. return;
  199. if (c == '<')
  200. ++n;
  201. else if (c == '>')
  202. --n;
  203. }
  204. dtdText = String (docType, (size_t) (input.getAddress() - (docType.getAddress() + 1))).trim();
  205. }
  206. void XmlDocument::skipNextWhiteSpace()
  207. {
  208. for (;;)
  209. {
  210. juce_wchar c = *input;
  211. while (CharacterFunctions::isWhitespace (c))
  212. c = *++input;
  213. if (c == 0)
  214. {
  215. outOfData = true;
  216. break;
  217. }
  218. else if (c == '<')
  219. {
  220. if (input[1] == '!'
  221. && input[2] == '-'
  222. && input[3] == '-')
  223. {
  224. input += 4;
  225. const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
  226. if (closeComment < 0)
  227. {
  228. outOfData = true;
  229. break;
  230. }
  231. input += closeComment + 3;
  232. continue;
  233. }
  234. else if (input[1] == '?')
  235. {
  236. input += 2;
  237. const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
  238. if (closeBracket < 0)
  239. {
  240. outOfData = true;
  241. break;
  242. }
  243. input += closeBracket + 2;
  244. continue;
  245. }
  246. }
  247. break;
  248. }
  249. }
  250. void XmlDocument::readQuotedString (String& result)
  251. {
  252. const juce_wchar quote = readNextChar();
  253. while (! outOfData)
  254. {
  255. const juce_wchar c = readNextChar();
  256. if (c == quote)
  257. break;
  258. --input;
  259. if (c == '&')
  260. {
  261. readEntity (result);
  262. }
  263. else
  264. {
  265. const String::CharPointerType start (input);
  266. size_t numChars = 0;
  267. for (;;)
  268. {
  269. const juce_wchar character = *input;
  270. if (character == quote)
  271. {
  272. result.appendCharPointer (start, numChars);
  273. ++input;
  274. return;
  275. }
  276. else if (character == '&')
  277. {
  278. result.appendCharPointer (start, numChars);
  279. break;
  280. }
  281. else if (character == 0)
  282. {
  283. outOfData = true;
  284. setLastError ("unmatched quotes", false);
  285. break;
  286. }
  287. ++input;
  288. ++numChars;
  289. }
  290. }
  291. }
  292. }
  293. XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
  294. {
  295. XmlElement* node = nullptr;
  296. skipNextWhiteSpace();
  297. if (outOfData)
  298. return nullptr;
  299. const int openBracket = input.indexOf ((juce_wchar) '<');
  300. if (openBracket >= 0)
  301. {
  302. input += openBracket + 1;
  303. int tagLen = findNextTokenLength();
  304. if (tagLen == 0)
  305. {
  306. // no tag name - but allow for a gap after the '<' before giving an error
  307. skipNextWhiteSpace();
  308. tagLen = findNextTokenLength();
  309. if (tagLen == 0)
  310. {
  311. setLastError ("tag name missing", false);
  312. return node;
  313. }
  314. }
  315. node = new XmlElement (String (input, (size_t) tagLen));
  316. input += tagLen;
  317. LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
  318. // look for attributes
  319. for (;;)
  320. {
  321. skipNextWhiteSpace();
  322. const juce_wchar c = *input;
  323. // empty tag..
  324. if (c == '/' && input[1] == '>')
  325. {
  326. input += 2;
  327. break;
  328. }
  329. // parse the guts of the element..
  330. if (c == '>')
  331. {
  332. ++input;
  333. if (alsoParseSubElements)
  334. readChildElements (node);
  335. break;
  336. }
  337. // get an attribute..
  338. if (XmlIdentifierChars::isIdentifierChar (c))
  339. {
  340. const int attNameLen = findNextTokenLength();
  341. if (attNameLen > 0)
  342. {
  343. const String::CharPointerType attNameStart (input);
  344. input += attNameLen;
  345. skipNextWhiteSpace();
  346. if (readNextChar() == '=')
  347. {
  348. skipNextWhiteSpace();
  349. const juce_wchar nextChar = *input;
  350. if (nextChar == '"' || nextChar == '\'')
  351. {
  352. XmlElement::XmlAttributeNode* const newAtt
  353. = new XmlElement::XmlAttributeNode (String (attNameStart, (size_t) attNameLen),
  354. String::empty);
  355. readQuotedString (newAtt->value);
  356. attributeAppender.append (newAtt);
  357. continue;
  358. }
  359. }
  360. }
  361. }
  362. else
  363. {
  364. if (! outOfData)
  365. setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
  366. }
  367. break;
  368. }
  369. }
  370. return node;
  371. }
  372. void XmlDocument::readChildElements (XmlElement* parent)
  373. {
  374. LinkedListPointer<XmlElement>::Appender childAppender (parent->firstChildElement);
  375. for (;;)
  376. {
  377. const String::CharPointerType preWhitespaceInput (input);
  378. skipNextWhiteSpace();
  379. if (outOfData)
  380. {
  381. setLastError ("unmatched tags", false);
  382. break;
  383. }
  384. if (*input == '<')
  385. {
  386. if (input[1] == '/')
  387. {
  388. // our close tag..
  389. const int closeTag = input.indexOf ((juce_wchar) '>');
  390. if (closeTag >= 0)
  391. input += closeTag + 1;
  392. break;
  393. }
  394. else if (input[1] == '!'
  395. && input[2] == '['
  396. && input[3] == 'C'
  397. && input[4] == 'D'
  398. && input[5] == 'A'
  399. && input[6] == 'T'
  400. && input[7] == 'A'
  401. && input[8] == '[')
  402. {
  403. input += 9;
  404. const String::CharPointerType inputStart (input);
  405. size_t len = 0;
  406. for (;;)
  407. {
  408. if (*input == 0)
  409. {
  410. setLastError ("unterminated CDATA section", false);
  411. outOfData = true;
  412. break;
  413. }
  414. else if (input[0] == ']'
  415. && input[1] == ']'
  416. && input[2] == '>')
  417. {
  418. input += 3;
  419. break;
  420. }
  421. ++input;
  422. ++len;
  423. }
  424. childAppender.append (XmlElement::createTextElement (String (inputStart, len)));
  425. }
  426. else
  427. {
  428. // this is some other element, so parse and add it..
  429. if (XmlElement* const n = readNextElement (true))
  430. childAppender.append (n);
  431. else
  432. break;
  433. }
  434. }
  435. else // must be a character block
  436. {
  437. input = preWhitespaceInput; // roll back to include the leading whitespace
  438. String textElementContent;
  439. for (;;)
  440. {
  441. const juce_wchar c = *input;
  442. if (c == '<')
  443. break;
  444. if (c == 0)
  445. {
  446. setLastError ("unmatched tags", false);
  447. outOfData = true;
  448. return;
  449. }
  450. if (c == '&')
  451. {
  452. String entity;
  453. readEntity (entity);
  454. if (entity.startsWithChar ('<') && entity [1] != 0)
  455. {
  456. const String::CharPointerType oldInput (input);
  457. const bool oldOutOfData = outOfData;
  458. input = entity.getCharPointer();
  459. outOfData = false;
  460. for (;;)
  461. {
  462. XmlElement* const n = readNextElement (true);
  463. if (n == nullptr)
  464. break;
  465. childAppender.append (n);
  466. }
  467. input = oldInput;
  468. outOfData = oldOutOfData;
  469. }
  470. else
  471. {
  472. textElementContent += entity;
  473. }
  474. }
  475. else
  476. {
  477. const String::CharPointerType start (input);
  478. size_t len = 0;
  479. for (;;)
  480. {
  481. const juce_wchar nextChar = *input;
  482. if (nextChar == '<' || nextChar == '&')
  483. {
  484. break;
  485. }
  486. else if (nextChar == 0)
  487. {
  488. setLastError ("unmatched tags", false);
  489. outOfData = true;
  490. return;
  491. }
  492. ++input;
  493. ++len;
  494. }
  495. textElementContent.appendCharPointer (start, len);
  496. }
  497. }
  498. if ((! ignoreEmptyTextElements) || textElementContent.containsNonWhitespaceChars())
  499. {
  500. childAppender.append (XmlElement::createTextElement (textElementContent));
  501. }
  502. }
  503. }
  504. }
  505. void XmlDocument::readEntity (String& result)
  506. {
  507. // skip over the ampersand
  508. ++input;
  509. if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
  510. {
  511. input += 4;
  512. result += '&';
  513. }
  514. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
  515. {
  516. input += 5;
  517. result += '"';
  518. }
  519. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
  520. {
  521. input += 5;
  522. result += '\'';
  523. }
  524. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
  525. {
  526. input += 3;
  527. result += '<';
  528. }
  529. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
  530. {
  531. input += 3;
  532. result += '>';
  533. }
  534. else if (*input == '#')
  535. {
  536. int charCode = 0;
  537. ++input;
  538. if (*input == 'x' || *input == 'X')
  539. {
  540. ++input;
  541. int numChars = 0;
  542. while (input[0] != ';')
  543. {
  544. const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
  545. if (hexValue < 0 || ++numChars > 8)
  546. {
  547. setLastError ("illegal escape sequence", true);
  548. break;
  549. }
  550. charCode = (charCode << 4) | hexValue;
  551. ++input;
  552. }
  553. ++input;
  554. }
  555. else if (input[0] >= '0' && input[0] <= '9')
  556. {
  557. int numChars = 0;
  558. while (input[0] != ';')
  559. {
  560. if (++numChars > 12)
  561. {
  562. setLastError ("illegal escape sequence", true);
  563. break;
  564. }
  565. charCode = charCode * 10 + ((int) input[0] - '0');
  566. ++input;
  567. }
  568. ++input;
  569. }
  570. else
  571. {
  572. setLastError ("illegal escape sequence", true);
  573. result += '&';
  574. return;
  575. }
  576. result << (juce_wchar) charCode;
  577. }
  578. else
  579. {
  580. const String::CharPointerType entityNameStart (input);
  581. const int closingSemiColon = input.indexOf ((juce_wchar) ';');
  582. if (closingSemiColon < 0)
  583. {
  584. outOfData = true;
  585. result += '&';
  586. }
  587. else
  588. {
  589. input += closingSemiColon + 1;
  590. result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
  591. }
  592. }
  593. }
  594. String XmlDocument::expandEntity (const String& ent)
  595. {
  596. if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
  597. if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
  598. if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
  599. if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
  600. if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
  601. if (ent[0] == '#')
  602. {
  603. const juce_wchar char1 = ent[1];
  604. if (char1 == 'x' || char1 == 'X')
  605. return String::charToString (static_cast <juce_wchar> (ent.substring (2).getHexValue32()));
  606. if (char1 >= '0' && char1 <= '9')
  607. return String::charToString (static_cast <juce_wchar> (ent.substring (1).getIntValue()));
  608. setLastError ("illegal escape sequence", false);
  609. return String::charToString ('&');
  610. }
  611. return expandExternalEntity (ent);
  612. }
  613. String XmlDocument::expandExternalEntity (const String& entity)
  614. {
  615. if (needToLoadDTD)
  616. {
  617. if (dtdText.isNotEmpty())
  618. {
  619. dtdText = dtdText.trimCharactersAtEnd (">");
  620. tokenisedDTD.addTokens (dtdText, true);
  621. if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
  622. && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
  623. {
  624. const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
  625. tokenisedDTD.clear();
  626. tokenisedDTD.addTokens (getFileContents (fn), true);
  627. }
  628. else
  629. {
  630. tokenisedDTD.clear();
  631. const int openBracket = dtdText.indexOfChar ('[');
  632. if (openBracket > 0)
  633. {
  634. const int closeBracket = dtdText.lastIndexOfChar (']');
  635. if (closeBracket > openBracket)
  636. tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
  637. closeBracket), true);
  638. }
  639. }
  640. for (int i = tokenisedDTD.size(); --i >= 0;)
  641. {
  642. if (tokenisedDTD[i].startsWithChar ('%')
  643. && tokenisedDTD[i].endsWithChar (';'))
  644. {
  645. const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
  646. StringArray newToks;
  647. newToks.addTokens (parsed, true);
  648. tokenisedDTD.remove (i);
  649. for (int j = newToks.size(); --j >= 0;)
  650. tokenisedDTD.insert (i, newToks[j]);
  651. }
  652. }
  653. }
  654. needToLoadDTD = false;
  655. }
  656. for (int i = 0; i < tokenisedDTD.size(); ++i)
  657. {
  658. if (tokenisedDTD[i] == entity)
  659. {
  660. if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
  661. {
  662. String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
  663. // check for sub-entities..
  664. int ampersand = ent.indexOfChar ('&');
  665. while (ampersand >= 0)
  666. {
  667. const int semiColon = ent.indexOf (i + 1, ";");
  668. if (semiColon < 0)
  669. {
  670. setLastError ("entity without terminating semi-colon", false);
  671. break;
  672. }
  673. const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
  674. ent = ent.substring (0, ampersand)
  675. + resolved
  676. + ent.substring (semiColon + 1);
  677. ampersand = ent.indexOfChar (semiColon + 1, '&');
  678. }
  679. return ent;
  680. }
  681. }
  682. }
  683. setLastError ("unknown entity", true);
  684. return entity;
  685. }
  686. String XmlDocument::getParameterEntity (const String& entity)
  687. {
  688. for (int i = 0; i < tokenisedDTD.size(); ++i)
  689. {
  690. if (tokenisedDTD[i] == entity
  691. && tokenisedDTD [i - 1] == "%"
  692. && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
  693. {
  694. const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
  695. if (ent.equalsIgnoreCase ("system"))
  696. return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
  697. return ent.trim().unquoted();
  698. }
  699. }
  700. return entity;
  701. }