The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

851 lines
24KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. XmlDocument::XmlDocument (const String& documentText)
  19. : originalText (documentText),
  20. input (nullptr),
  21. ignoreEmptyTextElements (true)
  22. {
  23. }
  24. XmlDocument::XmlDocument (const File& file)
  25. : input (nullptr),
  26. ignoreEmptyTextElements (true),
  27. inputSource (new FileInputSource (file))
  28. {
  29. }
  30. XmlDocument::~XmlDocument()
  31. {
  32. }
  33. XmlElement* XmlDocument::parse (const File& file)
  34. {
  35. XmlDocument doc (file);
  36. return doc.getDocumentElement();
  37. }
  38. XmlElement* XmlDocument::parse (const String& xmlData)
  39. {
  40. XmlDocument doc (xmlData);
  41. return doc.getDocumentElement();
  42. }
  43. void XmlDocument::setInputSource (InputSource* const newSource) noexcept
  44. {
  45. inputSource = newSource;
  46. }
  47. void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
  48. {
  49. ignoreEmptyTextElements = shouldBeIgnored;
  50. }
  51. namespace XmlIdentifierChars
  52. {
  53. static bool isIdentifierCharSlow (const juce_wchar c) noexcept
  54. {
  55. return CharacterFunctions::isLetterOrDigit (c)
  56. || c == '_' || c == '-' || c == ':' || c == '.';
  57. }
  58. static bool isIdentifierChar (const juce_wchar c) noexcept
  59. {
  60. static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
  61. return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
  62. : isIdentifierCharSlow (c);
  63. }
  64. /*static void generateIdentifierCharConstants()
  65. {
  66. uint32 n[8] = { 0 };
  67. for (int i = 0; i < 256; ++i)
  68. if (isIdentifierCharSlow (i))
  69. n[i >> 5] |= (1 << (i & 31));
  70. String s;
  71. for (int i = 0; i < 8; ++i)
  72. s << "0x" << String::toHexString ((int) n[i]) << ", ";
  73. DBG (s);
  74. }*/
  75. }
  76. XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
  77. {
  78. String textToParse (originalText);
  79. if (textToParse.isEmpty() && inputSource != nullptr)
  80. {
  81. ScopedPointer <InputStream> in (inputSource->createInputStream());
  82. if (in != nullptr)
  83. {
  84. MemoryOutputStream data;
  85. data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
  86. textToParse = data.toString();
  87. if (! onlyReadOuterDocumentElement)
  88. originalText = textToParse;
  89. }
  90. }
  91. input = textToParse.getCharPointer();
  92. lastError = String::empty;
  93. errorOccurred = false;
  94. outOfData = false;
  95. needToLoadDTD = true;
  96. if (textToParse.isEmpty())
  97. {
  98. lastError = "not enough input";
  99. }
  100. else
  101. {
  102. skipHeader();
  103. if (input.getAddress() != nullptr)
  104. {
  105. ScopedPointer <XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
  106. if (! errorOccurred)
  107. return result.release();
  108. }
  109. else
  110. {
  111. lastError = "incorrect xml header";
  112. }
  113. }
  114. return nullptr;
  115. }
  116. const String& XmlDocument::getLastParseError() const noexcept
  117. {
  118. return lastError;
  119. }
  120. void XmlDocument::setLastError (const String& desc, const bool carryOn)
  121. {
  122. lastError = desc;
  123. errorOccurred = ! carryOn;
  124. }
  125. String XmlDocument::getFileContents (const String& filename) const
  126. {
  127. if (inputSource != nullptr)
  128. {
  129. const ScopedPointer <InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
  130. if (in != nullptr)
  131. return in->readEntireStreamAsString();
  132. }
  133. return String::empty;
  134. }
  135. juce_wchar XmlDocument::readNextChar() noexcept
  136. {
  137. const juce_wchar c = input.getAndAdvance();
  138. if (c == 0)
  139. {
  140. outOfData = true;
  141. --input;
  142. }
  143. return c;
  144. }
  145. int XmlDocument::findNextTokenLength() noexcept
  146. {
  147. int len = 0;
  148. juce_wchar c = *input;
  149. while (XmlIdentifierChars::isIdentifierChar (c))
  150. c = input [++len];
  151. return len;
  152. }
  153. void XmlDocument::skipHeader()
  154. {
  155. const int headerStart = input.indexOf (CharPointer_UTF8 ("<?xml"));
  156. if (headerStart >= 0)
  157. {
  158. const int headerEnd = (input + headerStart).indexOf (CharPointer_UTF8 ("?>"));
  159. if (headerEnd < 0)
  160. return;
  161. #if JUCE_DEBUG
  162. const String header (input + headerStart, (size_t) (headerEnd - headerStart));
  163. const String encoding (header.fromFirstOccurrenceOf ("encoding", false, true)
  164. .fromFirstOccurrenceOf ("=", false, false)
  165. .fromFirstOccurrenceOf ("\"", false, false)
  166. .upToFirstOccurrenceOf ("\"", false, false).trim());
  167. /* If you load an XML document with a non-UTF encoding type, it may have been
  168. loaded wrongly.. Since all the files are read via the normal juce file streams,
  169. they're treated as UTF-8, so by the time it gets to the parser, the encoding will
  170. have been lost. Best plan is to stick to utf-8 or if you have specific files to
  171. read, use your own code to convert them to a unicode String, and pass that to the
  172. XML parser.
  173. */
  174. jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
  175. #endif
  176. input += headerEnd + 2;
  177. }
  178. skipNextWhiteSpace();
  179. const int docTypeIndex = input.indexOf (CharPointer_UTF8 ("<!DOCTYPE"));
  180. if (docTypeIndex < 0)
  181. return;
  182. input += docTypeIndex + 9;
  183. const String::CharPointerType docType (input);
  184. int n = 1;
  185. while (n > 0)
  186. {
  187. const juce_wchar c = readNextChar();
  188. if (outOfData)
  189. return;
  190. if (c == '<')
  191. ++n;
  192. else if (c == '>')
  193. --n;
  194. }
  195. dtdText = String (docType, (size_t) (input.getAddress() - (docType.getAddress() + 1))).trim();
  196. }
  197. void XmlDocument::skipNextWhiteSpace()
  198. {
  199. for (;;)
  200. {
  201. juce_wchar c = *input;
  202. while (CharacterFunctions::isWhitespace (c))
  203. c = *++input;
  204. if (c == 0)
  205. {
  206. outOfData = true;
  207. break;
  208. }
  209. else if (c == '<')
  210. {
  211. if (input[1] == '!'
  212. && input[2] == '-'
  213. && input[3] == '-')
  214. {
  215. input += 4;
  216. const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
  217. if (closeComment < 0)
  218. {
  219. outOfData = true;
  220. break;
  221. }
  222. input += closeComment + 3;
  223. continue;
  224. }
  225. else if (input[1] == '?')
  226. {
  227. input += 2;
  228. const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
  229. if (closeBracket < 0)
  230. {
  231. outOfData = true;
  232. break;
  233. }
  234. input += closeBracket + 2;
  235. continue;
  236. }
  237. }
  238. break;
  239. }
  240. }
  241. void XmlDocument::readQuotedString (String& result)
  242. {
  243. const juce_wchar quote = readNextChar();
  244. while (! outOfData)
  245. {
  246. const juce_wchar c = readNextChar();
  247. if (c == quote)
  248. break;
  249. --input;
  250. if (c == '&')
  251. {
  252. readEntity (result);
  253. }
  254. else
  255. {
  256. const String::CharPointerType start (input);
  257. size_t numChars = 0;
  258. for (;;)
  259. {
  260. const juce_wchar character = *input;
  261. if (character == quote)
  262. {
  263. result.appendCharPointer (start, numChars);
  264. ++input;
  265. return;
  266. }
  267. else if (character == '&')
  268. {
  269. result.appendCharPointer (start, numChars);
  270. break;
  271. }
  272. else if (character == 0)
  273. {
  274. outOfData = true;
  275. setLastError ("unmatched quotes", false);
  276. break;
  277. }
  278. ++input;
  279. ++numChars;
  280. }
  281. }
  282. }
  283. }
  284. XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
  285. {
  286. XmlElement* node = nullptr;
  287. skipNextWhiteSpace();
  288. if (outOfData)
  289. return nullptr;
  290. const int openBracket = input.indexOf ((juce_wchar) '<');
  291. if (openBracket >= 0)
  292. {
  293. input += openBracket + 1;
  294. int tagLen = findNextTokenLength();
  295. if (tagLen == 0)
  296. {
  297. // no tag name - but allow for a gap after the '<' before giving an error
  298. skipNextWhiteSpace();
  299. tagLen = findNextTokenLength();
  300. if (tagLen == 0)
  301. {
  302. setLastError ("tag name missing", false);
  303. return node;
  304. }
  305. }
  306. node = new XmlElement (String (input, (size_t) tagLen));
  307. input += tagLen;
  308. LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
  309. // look for attributes
  310. for (;;)
  311. {
  312. skipNextWhiteSpace();
  313. const juce_wchar c = *input;
  314. // empty tag..
  315. if (c == '/' && input[1] == '>')
  316. {
  317. input += 2;
  318. break;
  319. }
  320. // parse the guts of the element..
  321. if (c == '>')
  322. {
  323. ++input;
  324. if (alsoParseSubElements)
  325. readChildElements (node);
  326. break;
  327. }
  328. // get an attribute..
  329. if (XmlIdentifierChars::isIdentifierChar (c))
  330. {
  331. const int attNameLen = findNextTokenLength();
  332. if (attNameLen > 0)
  333. {
  334. const String::CharPointerType attNameStart (input);
  335. input += attNameLen;
  336. skipNextWhiteSpace();
  337. if (readNextChar() == '=')
  338. {
  339. skipNextWhiteSpace();
  340. const juce_wchar nextChar = *input;
  341. if (nextChar == '"' || nextChar == '\'')
  342. {
  343. XmlElement::XmlAttributeNode* const newAtt
  344. = new XmlElement::XmlAttributeNode (String (attNameStart, (size_t) attNameLen),
  345. String::empty);
  346. readQuotedString (newAtt->value);
  347. attributeAppender.append (newAtt);
  348. continue;
  349. }
  350. }
  351. }
  352. }
  353. else
  354. {
  355. if (! outOfData)
  356. setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
  357. }
  358. break;
  359. }
  360. }
  361. return node;
  362. }
  363. void XmlDocument::readChildElements (XmlElement* parent)
  364. {
  365. LinkedListPointer<XmlElement>::Appender childAppender (parent->firstChildElement);
  366. for (;;)
  367. {
  368. const String::CharPointerType preWhitespaceInput (input);
  369. skipNextWhiteSpace();
  370. if (outOfData)
  371. {
  372. setLastError ("unmatched tags", false);
  373. break;
  374. }
  375. if (*input == '<')
  376. {
  377. if (input[1] == '/')
  378. {
  379. // our close tag..
  380. const int closeTag = input.indexOf ((juce_wchar) '>');
  381. if (closeTag >= 0)
  382. input += closeTag + 1;
  383. break;
  384. }
  385. else if (input[1] == '!'
  386. && input[2] == '['
  387. && input[3] == 'C'
  388. && input[4] == 'D'
  389. && input[5] == 'A'
  390. && input[6] == 'T'
  391. && input[7] == 'A'
  392. && input[8] == '[')
  393. {
  394. input += 9;
  395. const String::CharPointerType inputStart (input);
  396. size_t len = 0;
  397. for (;;)
  398. {
  399. if (*input == 0)
  400. {
  401. setLastError ("unterminated CDATA section", false);
  402. outOfData = true;
  403. break;
  404. }
  405. else if (input[0] == ']'
  406. && input[1] == ']'
  407. && input[2] == '>')
  408. {
  409. input += 3;
  410. break;
  411. }
  412. ++input;
  413. ++len;
  414. }
  415. childAppender.append (XmlElement::createTextElement (String (inputStart, len)));
  416. }
  417. else
  418. {
  419. // this is some other element, so parse and add it..
  420. if (XmlElement* const n = readNextElement (true))
  421. childAppender.append (n);
  422. else
  423. break;
  424. }
  425. }
  426. else // must be a character block
  427. {
  428. input = preWhitespaceInput; // roll back to include the leading whitespace
  429. String textElementContent;
  430. for (;;)
  431. {
  432. const juce_wchar c = *input;
  433. if (c == '<')
  434. break;
  435. if (c == 0)
  436. {
  437. setLastError ("unmatched tags", false);
  438. outOfData = true;
  439. return;
  440. }
  441. if (c == '&')
  442. {
  443. String entity;
  444. readEntity (entity);
  445. if (entity.startsWithChar ('<') && entity [1] != 0)
  446. {
  447. const String::CharPointerType oldInput (input);
  448. const bool oldOutOfData = outOfData;
  449. input = entity.getCharPointer();
  450. outOfData = false;
  451. for (;;)
  452. {
  453. XmlElement* const n = readNextElement (true);
  454. if (n == nullptr)
  455. break;
  456. childAppender.append (n);
  457. }
  458. input = oldInput;
  459. outOfData = oldOutOfData;
  460. }
  461. else
  462. {
  463. textElementContent += entity;
  464. }
  465. }
  466. else
  467. {
  468. const String::CharPointerType start (input);
  469. size_t len = 0;
  470. for (;;)
  471. {
  472. const juce_wchar nextChar = *input;
  473. if (nextChar == '<' || nextChar == '&')
  474. {
  475. break;
  476. }
  477. else if (nextChar == 0)
  478. {
  479. setLastError ("unmatched tags", false);
  480. outOfData = true;
  481. return;
  482. }
  483. ++input;
  484. ++len;
  485. }
  486. textElementContent.appendCharPointer (start, len);
  487. }
  488. }
  489. if ((! ignoreEmptyTextElements) || textElementContent.containsNonWhitespaceChars())
  490. {
  491. childAppender.append (XmlElement::createTextElement (textElementContent));
  492. }
  493. }
  494. }
  495. }
  496. void XmlDocument::readEntity (String& result)
  497. {
  498. // skip over the ampersand
  499. ++input;
  500. if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
  501. {
  502. input += 4;
  503. result += '&';
  504. }
  505. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
  506. {
  507. input += 5;
  508. result += '"';
  509. }
  510. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
  511. {
  512. input += 5;
  513. result += '\'';
  514. }
  515. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
  516. {
  517. input += 3;
  518. result += '<';
  519. }
  520. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
  521. {
  522. input += 3;
  523. result += '>';
  524. }
  525. else if (*input == '#')
  526. {
  527. int charCode = 0;
  528. ++input;
  529. if (*input == 'x' || *input == 'X')
  530. {
  531. ++input;
  532. int numChars = 0;
  533. while (input[0] != ';')
  534. {
  535. const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
  536. if (hexValue < 0 || ++numChars > 8)
  537. {
  538. setLastError ("illegal escape sequence", true);
  539. break;
  540. }
  541. charCode = (charCode << 4) | hexValue;
  542. ++input;
  543. }
  544. ++input;
  545. }
  546. else if (input[0] >= '0' && input[0] <= '9')
  547. {
  548. int numChars = 0;
  549. while (input[0] != ';')
  550. {
  551. if (++numChars > 12)
  552. {
  553. setLastError ("illegal escape sequence", true);
  554. break;
  555. }
  556. charCode = charCode * 10 + ((int) input[0] - '0');
  557. ++input;
  558. }
  559. ++input;
  560. }
  561. else
  562. {
  563. setLastError ("illegal escape sequence", true);
  564. result += '&';
  565. return;
  566. }
  567. result << (juce_wchar) charCode;
  568. }
  569. else
  570. {
  571. const String::CharPointerType entityNameStart (input);
  572. const int closingSemiColon = input.indexOf ((juce_wchar) ';');
  573. if (closingSemiColon < 0)
  574. {
  575. outOfData = true;
  576. result += '&';
  577. }
  578. else
  579. {
  580. input += closingSemiColon + 1;
  581. result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
  582. }
  583. }
  584. }
  585. String XmlDocument::expandEntity (const String& ent)
  586. {
  587. if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
  588. if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
  589. if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
  590. if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
  591. if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
  592. if (ent[0] == '#')
  593. {
  594. const juce_wchar char1 = ent[1];
  595. if (char1 == 'x' || char1 == 'X')
  596. return String::charToString (static_cast <juce_wchar> (ent.substring (2).getHexValue32()));
  597. if (char1 >= '0' && char1 <= '9')
  598. return String::charToString (static_cast <juce_wchar> (ent.substring (1).getIntValue()));
  599. setLastError ("illegal escape sequence", false);
  600. return String::charToString ('&');
  601. }
  602. return expandExternalEntity (ent);
  603. }
  604. String XmlDocument::expandExternalEntity (const String& entity)
  605. {
  606. if (needToLoadDTD)
  607. {
  608. if (dtdText.isNotEmpty())
  609. {
  610. dtdText = dtdText.trimCharactersAtEnd (">");
  611. tokenisedDTD.addTokens (dtdText, true);
  612. if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
  613. && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
  614. {
  615. const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
  616. tokenisedDTD.clear();
  617. tokenisedDTD.addTokens (getFileContents (fn), true);
  618. }
  619. else
  620. {
  621. tokenisedDTD.clear();
  622. const int openBracket = dtdText.indexOfChar ('[');
  623. if (openBracket > 0)
  624. {
  625. const int closeBracket = dtdText.lastIndexOfChar (']');
  626. if (closeBracket > openBracket)
  627. tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
  628. closeBracket), true);
  629. }
  630. }
  631. for (int i = tokenisedDTD.size(); --i >= 0;)
  632. {
  633. if (tokenisedDTD[i].startsWithChar ('%')
  634. && tokenisedDTD[i].endsWithChar (';'))
  635. {
  636. const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
  637. StringArray newToks;
  638. newToks.addTokens (parsed, true);
  639. tokenisedDTD.remove (i);
  640. for (int j = newToks.size(); --j >= 0;)
  641. tokenisedDTD.insert (i, newToks[j]);
  642. }
  643. }
  644. }
  645. needToLoadDTD = false;
  646. }
  647. for (int i = 0; i < tokenisedDTD.size(); ++i)
  648. {
  649. if (tokenisedDTD[i] == entity)
  650. {
  651. if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
  652. {
  653. String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
  654. // check for sub-entities..
  655. int ampersand = ent.indexOfChar ('&');
  656. while (ampersand >= 0)
  657. {
  658. const int semiColon = ent.indexOf (i + 1, ";");
  659. if (semiColon < 0)
  660. {
  661. setLastError ("entity without terminating semi-colon", false);
  662. break;
  663. }
  664. const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
  665. ent = ent.substring (0, ampersand)
  666. + resolved
  667. + ent.substring (semiColon + 1);
  668. ampersand = ent.indexOfChar (semiColon + 1, '&');
  669. }
  670. return ent;
  671. }
  672. }
  673. }
  674. setLastError ("unknown entity", true);
  675. return entity;
  676. }
  677. String XmlDocument::getParameterEntity (const String& entity)
  678. {
  679. for (int i = 0; i < tokenisedDTD.size(); ++i)
  680. {
  681. if (tokenisedDTD[i] == entity
  682. && tokenisedDTD [i - 1] == "%"
  683. && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
  684. {
  685. const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
  686. if (ent.equalsIgnoreCase ("system"))
  687. return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
  688. return ent.trim().unquoted();
  689. }
  690. }
  691. return entity;
  692. }