The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

857 lines
24KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. BEGIN_JUCE_NAMESPACE
  19. //==============================================================================
  20. XmlDocument::XmlDocument (const String& documentText)
  21. : originalText (documentText),
  22. input (nullptr),
  23. ignoreEmptyTextElements (true)
  24. {
  25. }
  26. XmlDocument::XmlDocument (const File& file)
  27. : input (nullptr),
  28. ignoreEmptyTextElements (true),
  29. inputSource (new FileInputSource (file))
  30. {
  31. }
  32. XmlDocument::~XmlDocument()
  33. {
  34. }
  35. XmlElement* XmlDocument::parse (const File& file)
  36. {
  37. XmlDocument doc (file);
  38. return doc.getDocumentElement();
  39. }
  40. XmlElement* XmlDocument::parse (const String& xmlData)
  41. {
  42. XmlDocument doc (xmlData);
  43. return doc.getDocumentElement();
  44. }
  45. void XmlDocument::setInputSource (InputSource* const newSource) noexcept
  46. {
  47. inputSource = newSource;
  48. }
  49. void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
  50. {
  51. ignoreEmptyTextElements = shouldBeIgnored;
  52. }
  53. namespace XmlIdentifierChars
  54. {
  55. bool isIdentifierCharSlow (const juce_wchar c) noexcept
  56. {
  57. return CharacterFunctions::isLetterOrDigit (c)
  58. || c == '_' || c == '-' || c == ':' || c == '.';
  59. }
  60. bool isIdentifierChar (const juce_wchar c) noexcept
  61. {
  62. static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
  63. return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
  64. : isIdentifierCharSlow (c);
  65. }
  66. /*static void generateIdentifierCharConstants()
  67. {
  68. uint32 n[8] = { 0 };
  69. for (int i = 0; i < 256; ++i)
  70. if (isIdentifierCharSlow (i))
  71. n[i >> 5] |= (1 << (i & 31));
  72. String s;
  73. for (int i = 0; i < 8; ++i)
  74. s << "0x" << String::toHexString ((int) n[i]) << ", ";
  75. DBG (s);
  76. }*/
  77. }
  78. XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
  79. {
  80. String textToParse (originalText);
  81. if (textToParse.isEmpty() && inputSource != nullptr)
  82. {
  83. ScopedPointer <InputStream> in (inputSource->createInputStream());
  84. if (in != nullptr)
  85. {
  86. MemoryOutputStream data;
  87. data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
  88. textToParse = data.toString();
  89. if (! onlyReadOuterDocumentElement)
  90. originalText = textToParse;
  91. }
  92. }
  93. input = textToParse.getCharPointer();
  94. lastError = String::empty;
  95. errorOccurred = false;
  96. outOfData = false;
  97. needToLoadDTD = true;
  98. if (textToParse.isEmpty())
  99. {
  100. lastError = "not enough input";
  101. }
  102. else
  103. {
  104. skipHeader();
  105. if (input.getAddress() != nullptr)
  106. {
  107. ScopedPointer <XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
  108. if (! errorOccurred)
  109. return result.release();
  110. }
  111. else
  112. {
  113. lastError = "incorrect xml header";
  114. }
  115. }
  116. return nullptr;
  117. }
  118. const String& XmlDocument::getLastParseError() const noexcept
  119. {
  120. return lastError;
  121. }
  122. void XmlDocument::setLastError (const String& desc, const bool carryOn)
  123. {
  124. lastError = desc;
  125. errorOccurred = ! carryOn;
  126. }
  127. String XmlDocument::getFileContents (const String& filename) const
  128. {
  129. if (inputSource != nullptr)
  130. {
  131. const ScopedPointer <InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
  132. if (in != nullptr)
  133. return in->readEntireStreamAsString();
  134. }
  135. return String::empty;
  136. }
  137. juce_wchar XmlDocument::readNextChar() noexcept
  138. {
  139. const juce_wchar c = input.getAndAdvance();
  140. if (c == 0)
  141. {
  142. outOfData = true;
  143. --input;
  144. }
  145. return c;
  146. }
  147. int XmlDocument::findNextTokenLength() noexcept
  148. {
  149. int len = 0;
  150. juce_wchar c = *input;
  151. while (XmlIdentifierChars::isIdentifierChar (c))
  152. c = input [++len];
  153. return len;
  154. }
  155. void XmlDocument::skipHeader()
  156. {
  157. const int headerStart = input.indexOf (CharPointer_UTF8 ("<?xml"));
  158. if (headerStart >= 0)
  159. {
  160. const int headerEnd = (input + headerStart).indexOf (CharPointer_UTF8 ("?>"));
  161. if (headerEnd < 0)
  162. return;
  163. #if JUCE_DEBUG
  164. const String header (input + headerStart, (size_t) (headerEnd - headerStart));
  165. const String encoding (header.fromFirstOccurrenceOf ("encoding", false, true)
  166. .fromFirstOccurrenceOf ("=", false, false)
  167. .fromFirstOccurrenceOf ("\"", false, false)
  168. .upToFirstOccurrenceOf ("\"", false, false).trim());
  169. /* If you load an XML document with a non-UTF encoding type, it may have been
  170. loaded wrongly.. Since all the files are read via the normal juce file streams,
  171. they're treated as UTF-8, so by the time it gets to the parser, the encoding will
  172. have been lost. Best plan is to stick to utf-8 or if you have specific files to
  173. read, use your own code to convert them to a unicode String, and pass that to the
  174. XML parser.
  175. */
  176. jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
  177. #endif
  178. input += headerEnd + 2;
  179. }
  180. skipNextWhiteSpace();
  181. const int docTypeIndex = input.indexOf (CharPointer_UTF8 ("<!DOCTYPE"));
  182. if (docTypeIndex < 0)
  183. return;
  184. input += docTypeIndex + 9;
  185. const String::CharPointerType docType (input);
  186. int n = 1;
  187. while (n > 0)
  188. {
  189. const juce_wchar c = readNextChar();
  190. if (outOfData)
  191. return;
  192. if (c == '<')
  193. ++n;
  194. else if (c == '>')
  195. --n;
  196. }
  197. dtdText = String (docType, (size_t) (input.getAddress() - (docType.getAddress() + 1))).trim();
  198. }
  199. void XmlDocument::skipNextWhiteSpace()
  200. {
  201. for (;;)
  202. {
  203. juce_wchar c = *input;
  204. while (CharacterFunctions::isWhitespace (c))
  205. c = *++input;
  206. if (c == 0)
  207. {
  208. outOfData = true;
  209. break;
  210. }
  211. else if (c == '<')
  212. {
  213. if (input[1] == '!'
  214. && input[2] == '-'
  215. && input[3] == '-')
  216. {
  217. const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
  218. if (closeComment < 0)
  219. {
  220. outOfData = true;
  221. break;
  222. }
  223. input += closeComment + 3;
  224. continue;
  225. }
  226. else if (input[1] == '?')
  227. {
  228. const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
  229. if (closeBracket < 0)
  230. {
  231. outOfData = true;
  232. break;
  233. }
  234. input += closeBracket + 2;
  235. continue;
  236. }
  237. }
  238. break;
  239. }
  240. }
  241. void XmlDocument::readQuotedString (String& result)
  242. {
  243. const juce_wchar quote = readNextChar();
  244. while (! outOfData)
  245. {
  246. const juce_wchar c = readNextChar();
  247. if (c == quote)
  248. break;
  249. --input;
  250. if (c == '&')
  251. {
  252. readEntity (result);
  253. }
  254. else
  255. {
  256. const String::CharPointerType start (input);
  257. size_t numChars = 0;
  258. for (;;)
  259. {
  260. const juce_wchar character = *input;
  261. if (character == quote)
  262. {
  263. result.appendCharPointer (start, numChars);
  264. ++input;
  265. return;
  266. }
  267. else if (character == '&')
  268. {
  269. result.appendCharPointer (start, numChars);
  270. break;
  271. }
  272. else if (character == 0)
  273. {
  274. outOfData = true;
  275. setLastError ("unmatched quotes", false);
  276. break;
  277. }
  278. ++input;
  279. ++numChars;
  280. }
  281. }
  282. }
  283. }
  284. XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
  285. {
  286. XmlElement* node = nullptr;
  287. skipNextWhiteSpace();
  288. if (outOfData)
  289. return nullptr;
  290. const int openBracket = input.indexOf ((juce_wchar) '<');
  291. if (openBracket >= 0)
  292. {
  293. input += openBracket + 1;
  294. int tagLen = findNextTokenLength();
  295. if (tagLen == 0)
  296. {
  297. // no tag name - but allow for a gap after the '<' before giving an error
  298. skipNextWhiteSpace();
  299. tagLen = findNextTokenLength();
  300. if (tagLen == 0)
  301. {
  302. setLastError ("tag name missing", false);
  303. return node;
  304. }
  305. }
  306. node = new XmlElement (String (input, (size_t) tagLen));
  307. input += tagLen;
  308. LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
  309. // look for attributes
  310. for (;;)
  311. {
  312. skipNextWhiteSpace();
  313. const juce_wchar c = *input;
  314. // empty tag..
  315. if (c == '/' && input[1] == '>')
  316. {
  317. input += 2;
  318. break;
  319. }
  320. // parse the guts of the element..
  321. if (c == '>')
  322. {
  323. ++input;
  324. if (alsoParseSubElements)
  325. readChildElements (node);
  326. break;
  327. }
  328. // get an attribute..
  329. if (XmlIdentifierChars::isIdentifierChar (c))
  330. {
  331. const int attNameLen = findNextTokenLength();
  332. if (attNameLen > 0)
  333. {
  334. const String::CharPointerType attNameStart (input);
  335. input += attNameLen;
  336. skipNextWhiteSpace();
  337. if (readNextChar() == '=')
  338. {
  339. skipNextWhiteSpace();
  340. const juce_wchar nextChar = *input;
  341. if (nextChar == '"' || nextChar == '\'')
  342. {
  343. XmlElement::XmlAttributeNode* const newAtt
  344. = new XmlElement::XmlAttributeNode (String (attNameStart, (size_t) attNameLen),
  345. String::empty);
  346. readQuotedString (newAtt->value);
  347. attributeAppender.append (newAtt);
  348. continue;
  349. }
  350. }
  351. }
  352. }
  353. else
  354. {
  355. if (! outOfData)
  356. setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
  357. }
  358. break;
  359. }
  360. }
  361. return node;
  362. }
  363. void XmlDocument::readChildElements (XmlElement* parent)
  364. {
  365. LinkedListPointer<XmlElement>::Appender childAppender (parent->firstChildElement);
  366. for (;;)
  367. {
  368. const String::CharPointerType preWhitespaceInput (input);
  369. skipNextWhiteSpace();
  370. if (outOfData)
  371. {
  372. setLastError ("unmatched tags", false);
  373. break;
  374. }
  375. if (*input == '<')
  376. {
  377. if (input[1] == '/')
  378. {
  379. // our close tag..
  380. const int closeTag = input.indexOf ((juce_wchar) '>');
  381. if (closeTag >= 0)
  382. input += closeTag + 1;
  383. break;
  384. }
  385. else if (input[1] == '!'
  386. && input[2] == '['
  387. && input[3] == 'C'
  388. && input[4] == 'D'
  389. && input[5] == 'A'
  390. && input[6] == 'T'
  391. && input[7] == 'A'
  392. && input[8] == '[')
  393. {
  394. input += 9;
  395. const String::CharPointerType inputStart (input);
  396. size_t len = 0;
  397. for (;;)
  398. {
  399. if (*input == 0)
  400. {
  401. setLastError ("unterminated CDATA section", false);
  402. outOfData = true;
  403. break;
  404. }
  405. else if (input[0] == ']'
  406. && input[1] == ']'
  407. && input[2] == '>')
  408. {
  409. input += 3;
  410. break;
  411. }
  412. ++input;
  413. ++len;
  414. }
  415. childAppender.append (XmlElement::createTextElement (String (inputStart, len)));
  416. }
  417. else
  418. {
  419. // this is some other element, so parse and add it..
  420. XmlElement* const n = readNextElement (true);
  421. if (n != nullptr)
  422. childAppender.append (n);
  423. else
  424. break;
  425. }
  426. }
  427. else // must be a character block
  428. {
  429. input = preWhitespaceInput; // roll back to include the leading whitespace
  430. String textElementContent;
  431. for (;;)
  432. {
  433. const juce_wchar c = *input;
  434. if (c == '<')
  435. break;
  436. if (c == 0)
  437. {
  438. setLastError ("unmatched tags", false);
  439. outOfData = true;
  440. return;
  441. }
  442. if (c == '&')
  443. {
  444. String entity;
  445. readEntity (entity);
  446. if (entity.startsWithChar ('<') && entity [1] != 0)
  447. {
  448. const String::CharPointerType oldInput (input);
  449. const bool oldOutOfData = outOfData;
  450. input = entity.getCharPointer();
  451. outOfData = false;
  452. for (;;)
  453. {
  454. XmlElement* const n = readNextElement (true);
  455. if (n == nullptr)
  456. break;
  457. childAppender.append (n);
  458. }
  459. input = oldInput;
  460. outOfData = oldOutOfData;
  461. }
  462. else
  463. {
  464. textElementContent += entity;
  465. }
  466. }
  467. else
  468. {
  469. const String::CharPointerType start (input);
  470. size_t len = 0;
  471. for (;;)
  472. {
  473. const juce_wchar nextChar = *input;
  474. if (nextChar == '<' || nextChar == '&')
  475. {
  476. break;
  477. }
  478. else if (nextChar == 0)
  479. {
  480. setLastError ("unmatched tags", false);
  481. outOfData = true;
  482. return;
  483. }
  484. ++input;
  485. ++len;
  486. }
  487. textElementContent.appendCharPointer (start, len);
  488. }
  489. }
  490. if ((! ignoreEmptyTextElements) || textElementContent.containsNonWhitespaceChars())
  491. {
  492. childAppender.append (XmlElement::createTextElement (textElementContent));
  493. }
  494. }
  495. }
  496. }
  497. void XmlDocument::readEntity (String& result)
  498. {
  499. // skip over the ampersand
  500. ++input;
  501. if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
  502. {
  503. input += 4;
  504. result += '&';
  505. }
  506. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
  507. {
  508. input += 5;
  509. result += '"';
  510. }
  511. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
  512. {
  513. input += 5;
  514. result += '\'';
  515. }
  516. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
  517. {
  518. input += 3;
  519. result += '<';
  520. }
  521. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
  522. {
  523. input += 3;
  524. result += '>';
  525. }
  526. else if (*input == '#')
  527. {
  528. int charCode = 0;
  529. ++input;
  530. if (*input == 'x' || *input == 'X')
  531. {
  532. ++input;
  533. int numChars = 0;
  534. while (input[0] != ';')
  535. {
  536. const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
  537. if (hexValue < 0 || ++numChars > 8)
  538. {
  539. setLastError ("illegal escape sequence", true);
  540. break;
  541. }
  542. charCode = (charCode << 4) | hexValue;
  543. ++input;
  544. }
  545. ++input;
  546. }
  547. else if (input[0] >= '0' && input[0] <= '9')
  548. {
  549. int numChars = 0;
  550. while (input[0] != ';')
  551. {
  552. if (++numChars > 12)
  553. {
  554. setLastError ("illegal escape sequence", true);
  555. break;
  556. }
  557. charCode = charCode * 10 + ((int) input[0] - '0');
  558. ++input;
  559. }
  560. ++input;
  561. }
  562. else
  563. {
  564. setLastError ("illegal escape sequence", true);
  565. result += '&';
  566. return;
  567. }
  568. result << (juce_wchar) charCode;
  569. }
  570. else
  571. {
  572. const String::CharPointerType entityNameStart (input);
  573. const int closingSemiColon = input.indexOf ((juce_wchar) ';');
  574. if (closingSemiColon < 0)
  575. {
  576. outOfData = true;
  577. result += '&';
  578. }
  579. else
  580. {
  581. input += closingSemiColon + 1;
  582. result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
  583. }
  584. }
  585. }
  586. String XmlDocument::expandEntity (const String& ent)
  587. {
  588. if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
  589. if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
  590. if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
  591. if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
  592. if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
  593. if (ent[0] == '#')
  594. {
  595. const juce_wchar char1 = ent[1];
  596. if (char1 == 'x' || char1 == 'X')
  597. return String::charToString (static_cast <juce_wchar> (ent.substring (2).getHexValue32()));
  598. if (char1 >= '0' && char1 <= '9')
  599. return String::charToString (static_cast <juce_wchar> (ent.substring (1).getIntValue()));
  600. setLastError ("illegal escape sequence", false);
  601. return String::charToString ('&');
  602. }
  603. return expandExternalEntity (ent);
  604. }
  605. String XmlDocument::expandExternalEntity (const String& entity)
  606. {
  607. if (needToLoadDTD)
  608. {
  609. if (dtdText.isNotEmpty())
  610. {
  611. dtdText = dtdText.trimCharactersAtEnd (">");
  612. tokenisedDTD.addTokens (dtdText, true);
  613. if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
  614. && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
  615. {
  616. const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
  617. tokenisedDTD.clear();
  618. tokenisedDTD.addTokens (getFileContents (fn), true);
  619. }
  620. else
  621. {
  622. tokenisedDTD.clear();
  623. const int openBracket = dtdText.indexOfChar ('[');
  624. if (openBracket > 0)
  625. {
  626. const int closeBracket = dtdText.lastIndexOfChar (']');
  627. if (closeBracket > openBracket)
  628. tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
  629. closeBracket), true);
  630. }
  631. }
  632. for (int i = tokenisedDTD.size(); --i >= 0;)
  633. {
  634. if (tokenisedDTD[i].startsWithChar ('%')
  635. && tokenisedDTD[i].endsWithChar (';'))
  636. {
  637. const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
  638. StringArray newToks;
  639. newToks.addTokens (parsed, true);
  640. tokenisedDTD.remove (i);
  641. for (int j = newToks.size(); --j >= 0;)
  642. tokenisedDTD.insert (i, newToks[j]);
  643. }
  644. }
  645. }
  646. needToLoadDTD = false;
  647. }
  648. for (int i = 0; i < tokenisedDTD.size(); ++i)
  649. {
  650. if (tokenisedDTD[i] == entity)
  651. {
  652. if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
  653. {
  654. String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
  655. // check for sub-entities..
  656. int ampersand = ent.indexOfChar ('&');
  657. while (ampersand >= 0)
  658. {
  659. const int semiColon = ent.indexOf (i + 1, ";");
  660. if (semiColon < 0)
  661. {
  662. setLastError ("entity without terminating semi-colon", false);
  663. break;
  664. }
  665. const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
  666. ent = ent.substring (0, ampersand)
  667. + resolved
  668. + ent.substring (semiColon + 1);
  669. ampersand = ent.indexOfChar (semiColon + 1, '&');
  670. }
  671. return ent;
  672. }
  673. }
  674. }
  675. setLastError ("unknown entity", true);
  676. return entity;
  677. }
  678. String XmlDocument::getParameterEntity (const String& entity)
  679. {
  680. for (int i = 0; i < tokenisedDTD.size(); ++i)
  681. {
  682. if (tokenisedDTD[i] == entity
  683. && tokenisedDTD [i - 1] == "%"
  684. && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
  685. {
  686. const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
  687. if (ent.equalsIgnoreCase ("system"))
  688. return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
  689. else
  690. return ent.trim().unquoted();
  691. }
  692. }
  693. return entity;
  694. }
  695. END_JUCE_NAMESPACE