The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

853 lines
24KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. XmlDocument::XmlDocument (const String& documentText)
  19. : originalText (documentText),
  20. input (nullptr),
  21. ignoreEmptyTextElements (true)
  22. {
  23. }
  24. XmlDocument::XmlDocument (const File& file)
  25. : input (nullptr),
  26. ignoreEmptyTextElements (true),
  27. inputSource (new FileInputSource (file))
  28. {
  29. }
  30. XmlDocument::~XmlDocument()
  31. {
  32. }
  33. XmlElement* XmlDocument::parse (const File& file)
  34. {
  35. XmlDocument doc (file);
  36. return doc.getDocumentElement();
  37. }
  38. XmlElement* XmlDocument::parse (const String& xmlData)
  39. {
  40. XmlDocument doc (xmlData);
  41. return doc.getDocumentElement();
  42. }
  43. void XmlDocument::setInputSource (InputSource* const newSource) noexcept
  44. {
  45. inputSource = newSource;
  46. }
  47. void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
  48. {
  49. ignoreEmptyTextElements = shouldBeIgnored;
  50. }
  51. namespace XmlIdentifierChars
  52. {
  53. static bool isIdentifierCharSlow (const juce_wchar c) noexcept
  54. {
  55. return CharacterFunctions::isLetterOrDigit (c)
  56. || c == '_' || c == '-' || c == ':' || c == '.';
  57. }
  58. static bool isIdentifierChar (const juce_wchar c) noexcept
  59. {
  60. static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
  61. return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
  62. : isIdentifierCharSlow (c);
  63. }
  64. /*static void generateIdentifierCharConstants()
  65. {
  66. uint32 n[8] = { 0 };
  67. for (int i = 0; i < 256; ++i)
  68. if (isIdentifierCharSlow (i))
  69. n[i >> 5] |= (1 << (i & 31));
  70. String s;
  71. for (int i = 0; i < 8; ++i)
  72. s << "0x" << String::toHexString ((int) n[i]) << ", ";
  73. DBG (s);
  74. }*/
  75. }
  76. XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
  77. {
  78. String textToParse (originalText);
  79. if (textToParse.isEmpty() && inputSource != nullptr)
  80. {
  81. ScopedPointer <InputStream> in (inputSource->createInputStream());
  82. if (in != nullptr)
  83. {
  84. MemoryOutputStream data;
  85. data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
  86. textToParse = data.toString();
  87. if (! onlyReadOuterDocumentElement)
  88. originalText = textToParse;
  89. }
  90. }
  91. input = textToParse.getCharPointer();
  92. lastError = String::empty;
  93. errorOccurred = false;
  94. outOfData = false;
  95. needToLoadDTD = true;
  96. if (textToParse.isEmpty())
  97. {
  98. lastError = "not enough input";
  99. }
  100. else
  101. {
  102. skipHeader();
  103. if (input.getAddress() != nullptr)
  104. {
  105. ScopedPointer <XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
  106. if (! errorOccurred)
  107. return result.release();
  108. }
  109. else
  110. {
  111. lastError = "incorrect xml header";
  112. }
  113. }
  114. return nullptr;
  115. }
  116. const String& XmlDocument::getLastParseError() const noexcept
  117. {
  118. return lastError;
  119. }
  120. void XmlDocument::setLastError (const String& desc, const bool carryOn)
  121. {
  122. lastError = desc;
  123. errorOccurred = ! carryOn;
  124. }
  125. String XmlDocument::getFileContents (const String& filename) const
  126. {
  127. if (inputSource != nullptr)
  128. {
  129. const ScopedPointer <InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
  130. if (in != nullptr)
  131. return in->readEntireStreamAsString();
  132. }
  133. return String::empty;
  134. }
  135. juce_wchar XmlDocument::readNextChar() noexcept
  136. {
  137. const juce_wchar c = input.getAndAdvance();
  138. if (c == 0)
  139. {
  140. outOfData = true;
  141. --input;
  142. }
  143. return c;
  144. }
  145. int XmlDocument::findNextTokenLength() noexcept
  146. {
  147. int len = 0;
  148. juce_wchar c = *input;
  149. while (XmlIdentifierChars::isIdentifierChar (c))
  150. c = input [++len];
  151. return len;
  152. }
  153. void XmlDocument::skipHeader()
  154. {
  155. const int headerStart = input.indexOf (CharPointer_UTF8 ("<?xml"));
  156. if (headerStart >= 0)
  157. {
  158. const int headerEnd = (input + headerStart).indexOf (CharPointer_UTF8 ("?>"));
  159. if (headerEnd < 0)
  160. return;
  161. #if JUCE_DEBUG
  162. const String header (input + headerStart, (size_t) (headerEnd - headerStart));
  163. const String encoding (header.fromFirstOccurrenceOf ("encoding", false, true)
  164. .fromFirstOccurrenceOf ("=", false, false)
  165. .fromFirstOccurrenceOf ("\"", false, false)
  166. .upToFirstOccurrenceOf ("\"", false, false).trim());
  167. /* If you load an XML document with a non-UTF encoding type, it may have been
  168. loaded wrongly.. Since all the files are read via the normal juce file streams,
  169. they're treated as UTF-8, so by the time it gets to the parser, the encoding will
  170. have been lost. Best plan is to stick to utf-8 or if you have specific files to
  171. read, use your own code to convert them to a unicode String, and pass that to the
  172. XML parser.
  173. */
  174. jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
  175. #endif
  176. input += headerEnd + 2;
  177. }
  178. skipNextWhiteSpace();
  179. const int docTypeIndex = input.indexOf (CharPointer_UTF8 ("<!DOCTYPE"));
  180. if (docTypeIndex < 0)
  181. return;
  182. input += docTypeIndex + 9;
  183. const String::CharPointerType docType (input);
  184. int n = 1;
  185. while (n > 0)
  186. {
  187. const juce_wchar c = readNextChar();
  188. if (outOfData)
  189. return;
  190. if (c == '<')
  191. ++n;
  192. else if (c == '>')
  193. --n;
  194. }
  195. dtdText = String (docType, (size_t) (input.getAddress() - (docType.getAddress() + 1))).trim();
  196. }
  197. void XmlDocument::skipNextWhiteSpace()
  198. {
  199. for (;;)
  200. {
  201. juce_wchar c = *input;
  202. while (CharacterFunctions::isWhitespace (c))
  203. c = *++input;
  204. if (c == 0)
  205. {
  206. outOfData = true;
  207. break;
  208. }
  209. else if (c == '<')
  210. {
  211. if (input[1] == '!'
  212. && input[2] == '-'
  213. && input[3] == '-')
  214. {
  215. input += 4;
  216. const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
  217. if (closeComment < 0)
  218. {
  219. outOfData = true;
  220. break;
  221. }
  222. input += closeComment + 3;
  223. continue;
  224. }
  225. else if (input[1] == '?')
  226. {
  227. input += 2;
  228. const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
  229. if (closeBracket < 0)
  230. {
  231. outOfData = true;
  232. break;
  233. }
  234. input += closeBracket + 2;
  235. continue;
  236. }
  237. }
  238. break;
  239. }
  240. }
  241. void XmlDocument::readQuotedString (String& result)
  242. {
  243. const juce_wchar quote = readNextChar();
  244. while (! outOfData)
  245. {
  246. const juce_wchar c = readNextChar();
  247. if (c == quote)
  248. break;
  249. --input;
  250. if (c == '&')
  251. {
  252. readEntity (result);
  253. }
  254. else
  255. {
  256. const String::CharPointerType start (input);
  257. size_t numChars = 0;
  258. for (;;)
  259. {
  260. const juce_wchar character = *input;
  261. if (character == quote)
  262. {
  263. result.appendCharPointer (start, numChars);
  264. ++input;
  265. return;
  266. }
  267. else if (character == '&')
  268. {
  269. result.appendCharPointer (start, numChars);
  270. break;
  271. }
  272. else if (character == 0)
  273. {
  274. outOfData = true;
  275. setLastError ("unmatched quotes", false);
  276. break;
  277. }
  278. ++input;
  279. ++numChars;
  280. }
  281. }
  282. }
  283. }
  284. XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
  285. {
  286. XmlElement* node = nullptr;
  287. skipNextWhiteSpace();
  288. if (outOfData)
  289. return nullptr;
  290. const int openBracket = input.indexOf ((juce_wchar) '<');
  291. if (openBracket >= 0)
  292. {
  293. input += openBracket + 1;
  294. int tagLen = findNextTokenLength();
  295. if (tagLen == 0)
  296. {
  297. // no tag name - but allow for a gap after the '<' before giving an error
  298. skipNextWhiteSpace();
  299. tagLen = findNextTokenLength();
  300. if (tagLen == 0)
  301. {
  302. setLastError ("tag name missing", false);
  303. return node;
  304. }
  305. }
  306. node = new XmlElement (String (input, (size_t) tagLen));
  307. input += tagLen;
  308. LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
  309. // look for attributes
  310. for (;;)
  311. {
  312. skipNextWhiteSpace();
  313. const juce_wchar c = *input;
  314. // empty tag..
  315. if (c == '/' && input[1] == '>')
  316. {
  317. input += 2;
  318. break;
  319. }
  320. // parse the guts of the element..
  321. if (c == '>')
  322. {
  323. ++input;
  324. if (alsoParseSubElements)
  325. readChildElements (node);
  326. break;
  327. }
  328. // get an attribute..
  329. if (XmlIdentifierChars::isIdentifierChar (c))
  330. {
  331. const int attNameLen = findNextTokenLength();
  332. if (attNameLen > 0)
  333. {
  334. const String::CharPointerType attNameStart (input);
  335. input += attNameLen;
  336. skipNextWhiteSpace();
  337. if (readNextChar() == '=')
  338. {
  339. skipNextWhiteSpace();
  340. const juce_wchar nextChar = *input;
  341. if (nextChar == '"' || nextChar == '\'')
  342. {
  343. XmlElement::XmlAttributeNode* const newAtt
  344. = new XmlElement::XmlAttributeNode (String (attNameStart, (size_t) attNameLen),
  345. String::empty);
  346. readQuotedString (newAtt->value);
  347. attributeAppender.append (newAtt);
  348. continue;
  349. }
  350. }
  351. }
  352. }
  353. else
  354. {
  355. if (! outOfData)
  356. setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
  357. }
  358. break;
  359. }
  360. }
  361. return node;
  362. }
  363. void XmlDocument::readChildElements (XmlElement* parent)
  364. {
  365. LinkedListPointer<XmlElement>::Appender childAppender (parent->firstChildElement);
  366. for (;;)
  367. {
  368. const String::CharPointerType preWhitespaceInput (input);
  369. skipNextWhiteSpace();
  370. if (outOfData)
  371. {
  372. setLastError ("unmatched tags", false);
  373. break;
  374. }
  375. if (*input == '<')
  376. {
  377. if (input[1] == '/')
  378. {
  379. // our close tag..
  380. const int closeTag = input.indexOf ((juce_wchar) '>');
  381. if (closeTag >= 0)
  382. input += closeTag + 1;
  383. break;
  384. }
  385. else if (input[1] == '!'
  386. && input[2] == '['
  387. && input[3] == 'C'
  388. && input[4] == 'D'
  389. && input[5] == 'A'
  390. && input[6] == 'T'
  391. && input[7] == 'A'
  392. && input[8] == '[')
  393. {
  394. input += 9;
  395. const String::CharPointerType inputStart (input);
  396. size_t len = 0;
  397. for (;;)
  398. {
  399. if (*input == 0)
  400. {
  401. setLastError ("unterminated CDATA section", false);
  402. outOfData = true;
  403. break;
  404. }
  405. else if (input[0] == ']'
  406. && input[1] == ']'
  407. && input[2] == '>')
  408. {
  409. input += 3;
  410. break;
  411. }
  412. ++input;
  413. ++len;
  414. }
  415. childAppender.append (XmlElement::createTextElement (String (inputStart, len)));
  416. }
  417. else
  418. {
  419. // this is some other element, so parse and add it..
  420. XmlElement* const n = readNextElement (true);
  421. if (n != nullptr)
  422. childAppender.append (n);
  423. else
  424. break;
  425. }
  426. }
  427. else // must be a character block
  428. {
  429. input = preWhitespaceInput; // roll back to include the leading whitespace
  430. String textElementContent;
  431. for (;;)
  432. {
  433. const juce_wchar c = *input;
  434. if (c == '<')
  435. break;
  436. if (c == 0)
  437. {
  438. setLastError ("unmatched tags", false);
  439. outOfData = true;
  440. return;
  441. }
  442. if (c == '&')
  443. {
  444. String entity;
  445. readEntity (entity);
  446. if (entity.startsWithChar ('<') && entity [1] != 0)
  447. {
  448. const String::CharPointerType oldInput (input);
  449. const bool oldOutOfData = outOfData;
  450. input = entity.getCharPointer();
  451. outOfData = false;
  452. for (;;)
  453. {
  454. XmlElement* const n = readNextElement (true);
  455. if (n == nullptr)
  456. break;
  457. childAppender.append (n);
  458. }
  459. input = oldInput;
  460. outOfData = oldOutOfData;
  461. }
  462. else
  463. {
  464. textElementContent += entity;
  465. }
  466. }
  467. else
  468. {
  469. const String::CharPointerType start (input);
  470. size_t len = 0;
  471. for (;;)
  472. {
  473. const juce_wchar nextChar = *input;
  474. if (nextChar == '<' || nextChar == '&')
  475. {
  476. break;
  477. }
  478. else if (nextChar == 0)
  479. {
  480. setLastError ("unmatched tags", false);
  481. outOfData = true;
  482. return;
  483. }
  484. ++input;
  485. ++len;
  486. }
  487. textElementContent.appendCharPointer (start, len);
  488. }
  489. }
  490. if ((! ignoreEmptyTextElements) || textElementContent.containsNonWhitespaceChars())
  491. {
  492. childAppender.append (XmlElement::createTextElement (textElementContent));
  493. }
  494. }
  495. }
  496. }
  497. void XmlDocument::readEntity (String& result)
  498. {
  499. // skip over the ampersand
  500. ++input;
  501. if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
  502. {
  503. input += 4;
  504. result += '&';
  505. }
  506. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
  507. {
  508. input += 5;
  509. result += '"';
  510. }
  511. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
  512. {
  513. input += 5;
  514. result += '\'';
  515. }
  516. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
  517. {
  518. input += 3;
  519. result += '<';
  520. }
  521. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
  522. {
  523. input += 3;
  524. result += '>';
  525. }
  526. else if (*input == '#')
  527. {
  528. int charCode = 0;
  529. ++input;
  530. if (*input == 'x' || *input == 'X')
  531. {
  532. ++input;
  533. int numChars = 0;
  534. while (input[0] != ';')
  535. {
  536. const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
  537. if (hexValue < 0 || ++numChars > 8)
  538. {
  539. setLastError ("illegal escape sequence", true);
  540. break;
  541. }
  542. charCode = (charCode << 4) | hexValue;
  543. ++input;
  544. }
  545. ++input;
  546. }
  547. else if (input[0] >= '0' && input[0] <= '9')
  548. {
  549. int numChars = 0;
  550. while (input[0] != ';')
  551. {
  552. if (++numChars > 12)
  553. {
  554. setLastError ("illegal escape sequence", true);
  555. break;
  556. }
  557. charCode = charCode * 10 + ((int) input[0] - '0');
  558. ++input;
  559. }
  560. ++input;
  561. }
  562. else
  563. {
  564. setLastError ("illegal escape sequence", true);
  565. result += '&';
  566. return;
  567. }
  568. result << (juce_wchar) charCode;
  569. }
  570. else
  571. {
  572. const String::CharPointerType entityNameStart (input);
  573. const int closingSemiColon = input.indexOf ((juce_wchar) ';');
  574. if (closingSemiColon < 0)
  575. {
  576. outOfData = true;
  577. result += '&';
  578. }
  579. else
  580. {
  581. input += closingSemiColon + 1;
  582. result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
  583. }
  584. }
  585. }
  586. String XmlDocument::expandEntity (const String& ent)
  587. {
  588. if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
  589. if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
  590. if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
  591. if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
  592. if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
  593. if (ent[0] == '#')
  594. {
  595. const juce_wchar char1 = ent[1];
  596. if (char1 == 'x' || char1 == 'X')
  597. return String::charToString (static_cast <juce_wchar> (ent.substring (2).getHexValue32()));
  598. if (char1 >= '0' && char1 <= '9')
  599. return String::charToString (static_cast <juce_wchar> (ent.substring (1).getIntValue()));
  600. setLastError ("illegal escape sequence", false);
  601. return String::charToString ('&');
  602. }
  603. return expandExternalEntity (ent);
  604. }
  605. String XmlDocument::expandExternalEntity (const String& entity)
  606. {
  607. if (needToLoadDTD)
  608. {
  609. if (dtdText.isNotEmpty())
  610. {
  611. dtdText = dtdText.trimCharactersAtEnd (">");
  612. tokenisedDTD.addTokens (dtdText, true);
  613. if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
  614. && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
  615. {
  616. const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
  617. tokenisedDTD.clear();
  618. tokenisedDTD.addTokens (getFileContents (fn), true);
  619. }
  620. else
  621. {
  622. tokenisedDTD.clear();
  623. const int openBracket = dtdText.indexOfChar ('[');
  624. if (openBracket > 0)
  625. {
  626. const int closeBracket = dtdText.lastIndexOfChar (']');
  627. if (closeBracket > openBracket)
  628. tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
  629. closeBracket), true);
  630. }
  631. }
  632. for (int i = tokenisedDTD.size(); --i >= 0;)
  633. {
  634. if (tokenisedDTD[i].startsWithChar ('%')
  635. && tokenisedDTD[i].endsWithChar (';'))
  636. {
  637. const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
  638. StringArray newToks;
  639. newToks.addTokens (parsed, true);
  640. tokenisedDTD.remove (i);
  641. for (int j = newToks.size(); --j >= 0;)
  642. tokenisedDTD.insert (i, newToks[j]);
  643. }
  644. }
  645. }
  646. needToLoadDTD = false;
  647. }
  648. for (int i = 0; i < tokenisedDTD.size(); ++i)
  649. {
  650. if (tokenisedDTD[i] == entity)
  651. {
  652. if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
  653. {
  654. String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
  655. // check for sub-entities..
  656. int ampersand = ent.indexOfChar ('&');
  657. while (ampersand >= 0)
  658. {
  659. const int semiColon = ent.indexOf (i + 1, ";");
  660. if (semiColon < 0)
  661. {
  662. setLastError ("entity without terminating semi-colon", false);
  663. break;
  664. }
  665. const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
  666. ent = ent.substring (0, ampersand)
  667. + resolved
  668. + ent.substring (semiColon + 1);
  669. ampersand = ent.indexOfChar (semiColon + 1, '&');
  670. }
  671. return ent;
  672. }
  673. }
  674. }
  675. setLastError ("unknown entity", true);
  676. return entity;
  677. }
  678. String XmlDocument::getParameterEntity (const String& entity)
  679. {
  680. for (int i = 0; i < tokenisedDTD.size(); ++i)
  681. {
  682. if (tokenisedDTD[i] == entity
  683. && tokenisedDTD [i - 1] == "%"
  684. && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
  685. {
  686. const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
  687. if (ent.equalsIgnoreCase ("system"))
  688. return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
  689. else
  690. return ent.trim().unquoted();
  691. }
  692. }
  693. return entity;
  694. }