The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

859 lines
24KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. BEGIN_JUCE_NAMESPACE
  19. //==============================================================================
  20. XmlDocument::XmlDocument (const String& documentText)
  21. : originalText (documentText),
  22. input (nullptr),
  23. ignoreEmptyTextElements (true)
  24. {
  25. }
  26. XmlDocument::XmlDocument (const File& file)
  27. : input (nullptr),
  28. ignoreEmptyTextElements (true),
  29. inputSource (new FileInputSource (file))
  30. {
  31. }
  32. XmlDocument::~XmlDocument()
  33. {
  34. }
  35. XmlElement* XmlDocument::parse (const File& file)
  36. {
  37. XmlDocument doc (file);
  38. return doc.getDocumentElement();
  39. }
  40. XmlElement* XmlDocument::parse (const String& xmlData)
  41. {
  42. XmlDocument doc (xmlData);
  43. return doc.getDocumentElement();
  44. }
  45. void XmlDocument::setInputSource (InputSource* const newSource) noexcept
  46. {
  47. inputSource = newSource;
  48. }
  49. void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
  50. {
  51. ignoreEmptyTextElements = shouldBeIgnored;
  52. }
  53. namespace XmlIdentifierChars
  54. {
  55. bool isIdentifierCharSlow (const juce_wchar c) noexcept
  56. {
  57. return CharacterFunctions::isLetterOrDigit (c)
  58. || c == '_' || c == '-' || c == ':' || c == '.';
  59. }
  60. bool isIdentifierChar (const juce_wchar c) noexcept
  61. {
  62. static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
  63. return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
  64. : isIdentifierCharSlow (c);
  65. }
  66. /*static void generateIdentifierCharConstants()
  67. {
  68. uint32 n[8] = { 0 };
  69. for (int i = 0; i < 256; ++i)
  70. if (isIdentifierCharSlow (i))
  71. n[i >> 5] |= (1 << (i & 31));
  72. String s;
  73. for (int i = 0; i < 8; ++i)
  74. s << "0x" << String::toHexString ((int) n[i]) << ", ";
  75. DBG (s);
  76. }*/
  77. }
  78. XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
  79. {
  80. String textToParse (originalText);
  81. if (textToParse.isEmpty() && inputSource != nullptr)
  82. {
  83. ScopedPointer <InputStream> in (inputSource->createInputStream());
  84. if (in != nullptr)
  85. {
  86. MemoryOutputStream data;
  87. data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
  88. textToParse = data.toString();
  89. if (! onlyReadOuterDocumentElement)
  90. originalText = textToParse;
  91. }
  92. }
  93. input = textToParse.getCharPointer();
  94. lastError = String::empty;
  95. errorOccurred = false;
  96. outOfData = false;
  97. needToLoadDTD = true;
  98. if (textToParse.isEmpty())
  99. {
  100. lastError = "not enough input";
  101. }
  102. else
  103. {
  104. skipHeader();
  105. if (input.getAddress() != nullptr)
  106. {
  107. ScopedPointer <XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
  108. if (! errorOccurred)
  109. return result.release();
  110. }
  111. else
  112. {
  113. lastError = "incorrect xml header";
  114. }
  115. }
  116. return nullptr;
  117. }
  118. const String& XmlDocument::getLastParseError() const noexcept
  119. {
  120. return lastError;
  121. }
  122. void XmlDocument::setLastError (const String& desc, const bool carryOn)
  123. {
  124. lastError = desc;
  125. errorOccurred = ! carryOn;
  126. }
  127. String XmlDocument::getFileContents (const String& filename) const
  128. {
  129. if (inputSource != nullptr)
  130. {
  131. const ScopedPointer <InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
  132. if (in != nullptr)
  133. return in->readEntireStreamAsString();
  134. }
  135. return String::empty;
  136. }
  137. juce_wchar XmlDocument::readNextChar() noexcept
  138. {
  139. const juce_wchar c = input.getAndAdvance();
  140. if (c == 0)
  141. {
  142. outOfData = true;
  143. --input;
  144. }
  145. return c;
  146. }
  147. int XmlDocument::findNextTokenLength() noexcept
  148. {
  149. int len = 0;
  150. juce_wchar c = *input;
  151. while (XmlIdentifierChars::isIdentifierChar (c))
  152. c = input [++len];
  153. return len;
  154. }
  155. void XmlDocument::skipHeader()
  156. {
  157. const int headerStart = input.indexOf (CharPointer_UTF8 ("<?xml"));
  158. if (headerStart >= 0)
  159. {
  160. const int headerEnd = (input + headerStart).indexOf (CharPointer_UTF8 ("?>"));
  161. if (headerEnd < 0)
  162. return;
  163. #if JUCE_DEBUG
  164. const String header (input + headerStart, (size_t) (headerEnd - headerStart));
  165. const String encoding (header.fromFirstOccurrenceOf ("encoding", false, true)
  166. .fromFirstOccurrenceOf ("=", false, false)
  167. .fromFirstOccurrenceOf ("\"", false, false)
  168. .upToFirstOccurrenceOf ("\"", false, false).trim());
  169. /* If you load an XML document with a non-UTF encoding type, it may have been
  170. loaded wrongly.. Since all the files are read via the normal juce file streams,
  171. they're treated as UTF-8, so by the time it gets to the parser, the encoding will
  172. have been lost. Best plan is to stick to utf-8 or if you have specific files to
  173. read, use your own code to convert them to a unicode String, and pass that to the
  174. XML parser.
  175. */
  176. jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
  177. #endif
  178. input += headerEnd + 2;
  179. }
  180. skipNextWhiteSpace();
  181. const int docTypeIndex = input.indexOf (CharPointer_UTF8 ("<!DOCTYPE"));
  182. if (docTypeIndex < 0)
  183. return;
  184. input += docTypeIndex + 9;
  185. const String::CharPointerType docType (input);
  186. int n = 1;
  187. while (n > 0)
  188. {
  189. const juce_wchar c = readNextChar();
  190. if (outOfData)
  191. return;
  192. if (c == '<')
  193. ++n;
  194. else if (c == '>')
  195. --n;
  196. }
  197. dtdText = String (docType, (size_t) (input.getAddress() - (docType.getAddress() + 1))).trim();
  198. }
  199. void XmlDocument::skipNextWhiteSpace()
  200. {
  201. for (;;)
  202. {
  203. juce_wchar c = *input;
  204. while (CharacterFunctions::isWhitespace (c))
  205. c = *++input;
  206. if (c == 0)
  207. {
  208. outOfData = true;
  209. break;
  210. }
  211. else if (c == '<')
  212. {
  213. if (input[1] == '!'
  214. && input[2] == '-'
  215. && input[3] == '-')
  216. {
  217. input += 4;
  218. const int closeComment = input.indexOf (CharPointer_UTF8 ("-->"));
  219. if (closeComment < 0)
  220. {
  221. outOfData = true;
  222. break;
  223. }
  224. input += closeComment + 3;
  225. continue;
  226. }
  227. else if (input[1] == '?')
  228. {
  229. input += 2;
  230. const int closeBracket = input.indexOf (CharPointer_UTF8 ("?>"));
  231. if (closeBracket < 0)
  232. {
  233. outOfData = true;
  234. break;
  235. }
  236. input += closeBracket + 2;
  237. continue;
  238. }
  239. }
  240. break;
  241. }
  242. }
  243. void XmlDocument::readQuotedString (String& result)
  244. {
  245. const juce_wchar quote = readNextChar();
  246. while (! outOfData)
  247. {
  248. const juce_wchar c = readNextChar();
  249. if (c == quote)
  250. break;
  251. --input;
  252. if (c == '&')
  253. {
  254. readEntity (result);
  255. }
  256. else
  257. {
  258. const String::CharPointerType start (input);
  259. size_t numChars = 0;
  260. for (;;)
  261. {
  262. const juce_wchar character = *input;
  263. if (character == quote)
  264. {
  265. result.appendCharPointer (start, numChars);
  266. ++input;
  267. return;
  268. }
  269. else if (character == '&')
  270. {
  271. result.appendCharPointer (start, numChars);
  272. break;
  273. }
  274. else if (character == 0)
  275. {
  276. outOfData = true;
  277. setLastError ("unmatched quotes", false);
  278. break;
  279. }
  280. ++input;
  281. ++numChars;
  282. }
  283. }
  284. }
  285. }
  286. XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
  287. {
  288. XmlElement* node = nullptr;
  289. skipNextWhiteSpace();
  290. if (outOfData)
  291. return nullptr;
  292. const int openBracket = input.indexOf ((juce_wchar) '<');
  293. if (openBracket >= 0)
  294. {
  295. input += openBracket + 1;
  296. int tagLen = findNextTokenLength();
  297. if (tagLen == 0)
  298. {
  299. // no tag name - but allow for a gap after the '<' before giving an error
  300. skipNextWhiteSpace();
  301. tagLen = findNextTokenLength();
  302. if (tagLen == 0)
  303. {
  304. setLastError ("tag name missing", false);
  305. return node;
  306. }
  307. }
  308. node = new XmlElement (String (input, (size_t) tagLen));
  309. input += tagLen;
  310. LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
  311. // look for attributes
  312. for (;;)
  313. {
  314. skipNextWhiteSpace();
  315. const juce_wchar c = *input;
  316. // empty tag..
  317. if (c == '/' && input[1] == '>')
  318. {
  319. input += 2;
  320. break;
  321. }
  322. // parse the guts of the element..
  323. if (c == '>')
  324. {
  325. ++input;
  326. if (alsoParseSubElements)
  327. readChildElements (node);
  328. break;
  329. }
  330. // get an attribute..
  331. if (XmlIdentifierChars::isIdentifierChar (c))
  332. {
  333. const int attNameLen = findNextTokenLength();
  334. if (attNameLen > 0)
  335. {
  336. const String::CharPointerType attNameStart (input);
  337. input += attNameLen;
  338. skipNextWhiteSpace();
  339. if (readNextChar() == '=')
  340. {
  341. skipNextWhiteSpace();
  342. const juce_wchar nextChar = *input;
  343. if (nextChar == '"' || nextChar == '\'')
  344. {
  345. XmlElement::XmlAttributeNode* const newAtt
  346. = new XmlElement::XmlAttributeNode (String (attNameStart, (size_t) attNameLen),
  347. String::empty);
  348. readQuotedString (newAtt->value);
  349. attributeAppender.append (newAtt);
  350. continue;
  351. }
  352. }
  353. }
  354. }
  355. else
  356. {
  357. if (! outOfData)
  358. setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
  359. }
  360. break;
  361. }
  362. }
  363. return node;
  364. }
  365. void XmlDocument::readChildElements (XmlElement* parent)
  366. {
  367. LinkedListPointer<XmlElement>::Appender childAppender (parent->firstChildElement);
  368. for (;;)
  369. {
  370. const String::CharPointerType preWhitespaceInput (input);
  371. skipNextWhiteSpace();
  372. if (outOfData)
  373. {
  374. setLastError ("unmatched tags", false);
  375. break;
  376. }
  377. if (*input == '<')
  378. {
  379. if (input[1] == '/')
  380. {
  381. // our close tag..
  382. const int closeTag = input.indexOf ((juce_wchar) '>');
  383. if (closeTag >= 0)
  384. input += closeTag + 1;
  385. break;
  386. }
  387. else if (input[1] == '!'
  388. && input[2] == '['
  389. && input[3] == 'C'
  390. && input[4] == 'D'
  391. && input[5] == 'A'
  392. && input[6] == 'T'
  393. && input[7] == 'A'
  394. && input[8] == '[')
  395. {
  396. input += 9;
  397. const String::CharPointerType inputStart (input);
  398. size_t len = 0;
  399. for (;;)
  400. {
  401. if (*input == 0)
  402. {
  403. setLastError ("unterminated CDATA section", false);
  404. outOfData = true;
  405. break;
  406. }
  407. else if (input[0] == ']'
  408. && input[1] == ']'
  409. && input[2] == '>')
  410. {
  411. input += 3;
  412. break;
  413. }
  414. ++input;
  415. ++len;
  416. }
  417. childAppender.append (XmlElement::createTextElement (String (inputStart, len)));
  418. }
  419. else
  420. {
  421. // this is some other element, so parse and add it..
  422. XmlElement* const n = readNextElement (true);
  423. if (n != nullptr)
  424. childAppender.append (n);
  425. else
  426. break;
  427. }
  428. }
  429. else // must be a character block
  430. {
  431. input = preWhitespaceInput; // roll back to include the leading whitespace
  432. String textElementContent;
  433. for (;;)
  434. {
  435. const juce_wchar c = *input;
  436. if (c == '<')
  437. break;
  438. if (c == 0)
  439. {
  440. setLastError ("unmatched tags", false);
  441. outOfData = true;
  442. return;
  443. }
  444. if (c == '&')
  445. {
  446. String entity;
  447. readEntity (entity);
  448. if (entity.startsWithChar ('<') && entity [1] != 0)
  449. {
  450. const String::CharPointerType oldInput (input);
  451. const bool oldOutOfData = outOfData;
  452. input = entity.getCharPointer();
  453. outOfData = false;
  454. for (;;)
  455. {
  456. XmlElement* const n = readNextElement (true);
  457. if (n == nullptr)
  458. break;
  459. childAppender.append (n);
  460. }
  461. input = oldInput;
  462. outOfData = oldOutOfData;
  463. }
  464. else
  465. {
  466. textElementContent += entity;
  467. }
  468. }
  469. else
  470. {
  471. const String::CharPointerType start (input);
  472. size_t len = 0;
  473. for (;;)
  474. {
  475. const juce_wchar nextChar = *input;
  476. if (nextChar == '<' || nextChar == '&')
  477. {
  478. break;
  479. }
  480. else if (nextChar == 0)
  481. {
  482. setLastError ("unmatched tags", false);
  483. outOfData = true;
  484. return;
  485. }
  486. ++input;
  487. ++len;
  488. }
  489. textElementContent.appendCharPointer (start, len);
  490. }
  491. }
  492. if ((! ignoreEmptyTextElements) || textElementContent.containsNonWhitespaceChars())
  493. {
  494. childAppender.append (XmlElement::createTextElement (textElementContent));
  495. }
  496. }
  497. }
  498. }
  499. void XmlDocument::readEntity (String& result)
  500. {
  501. // skip over the ampersand
  502. ++input;
  503. if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("amp;"), 4) == 0)
  504. {
  505. input += 4;
  506. result += '&';
  507. }
  508. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("quot;"), 5) == 0)
  509. {
  510. input += 5;
  511. result += '"';
  512. }
  513. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("apos;"), 5) == 0)
  514. {
  515. input += 5;
  516. result += '\'';
  517. }
  518. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("lt;"), 3) == 0)
  519. {
  520. input += 3;
  521. result += '<';
  522. }
  523. else if (input.compareIgnoreCaseUpTo (CharPointer_UTF8 ("gt;"), 3) == 0)
  524. {
  525. input += 3;
  526. result += '>';
  527. }
  528. else if (*input == '#')
  529. {
  530. int charCode = 0;
  531. ++input;
  532. if (*input == 'x' || *input == 'X')
  533. {
  534. ++input;
  535. int numChars = 0;
  536. while (input[0] != ';')
  537. {
  538. const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
  539. if (hexValue < 0 || ++numChars > 8)
  540. {
  541. setLastError ("illegal escape sequence", true);
  542. break;
  543. }
  544. charCode = (charCode << 4) | hexValue;
  545. ++input;
  546. }
  547. ++input;
  548. }
  549. else if (input[0] >= '0' && input[0] <= '9')
  550. {
  551. int numChars = 0;
  552. while (input[0] != ';')
  553. {
  554. if (++numChars > 12)
  555. {
  556. setLastError ("illegal escape sequence", true);
  557. break;
  558. }
  559. charCode = charCode * 10 + ((int) input[0] - '0');
  560. ++input;
  561. }
  562. ++input;
  563. }
  564. else
  565. {
  566. setLastError ("illegal escape sequence", true);
  567. result += '&';
  568. return;
  569. }
  570. result << (juce_wchar) charCode;
  571. }
  572. else
  573. {
  574. const String::CharPointerType entityNameStart (input);
  575. const int closingSemiColon = input.indexOf ((juce_wchar) ';');
  576. if (closingSemiColon < 0)
  577. {
  578. outOfData = true;
  579. result += '&';
  580. }
  581. else
  582. {
  583. input += closingSemiColon + 1;
  584. result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
  585. }
  586. }
  587. }
  588. String XmlDocument::expandEntity (const String& ent)
  589. {
  590. if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
  591. if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
  592. if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
  593. if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
  594. if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
  595. if (ent[0] == '#')
  596. {
  597. const juce_wchar char1 = ent[1];
  598. if (char1 == 'x' || char1 == 'X')
  599. return String::charToString (static_cast <juce_wchar> (ent.substring (2).getHexValue32()));
  600. if (char1 >= '0' && char1 <= '9')
  601. return String::charToString (static_cast <juce_wchar> (ent.substring (1).getIntValue()));
  602. setLastError ("illegal escape sequence", false);
  603. return String::charToString ('&');
  604. }
  605. return expandExternalEntity (ent);
  606. }
  607. String XmlDocument::expandExternalEntity (const String& entity)
  608. {
  609. if (needToLoadDTD)
  610. {
  611. if (dtdText.isNotEmpty())
  612. {
  613. dtdText = dtdText.trimCharactersAtEnd (">");
  614. tokenisedDTD.addTokens (dtdText, true);
  615. if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
  616. && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
  617. {
  618. const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
  619. tokenisedDTD.clear();
  620. tokenisedDTD.addTokens (getFileContents (fn), true);
  621. }
  622. else
  623. {
  624. tokenisedDTD.clear();
  625. const int openBracket = dtdText.indexOfChar ('[');
  626. if (openBracket > 0)
  627. {
  628. const int closeBracket = dtdText.lastIndexOfChar (']');
  629. if (closeBracket > openBracket)
  630. tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
  631. closeBracket), true);
  632. }
  633. }
  634. for (int i = tokenisedDTD.size(); --i >= 0;)
  635. {
  636. if (tokenisedDTD[i].startsWithChar ('%')
  637. && tokenisedDTD[i].endsWithChar (';'))
  638. {
  639. const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
  640. StringArray newToks;
  641. newToks.addTokens (parsed, true);
  642. tokenisedDTD.remove (i);
  643. for (int j = newToks.size(); --j >= 0;)
  644. tokenisedDTD.insert (i, newToks[j]);
  645. }
  646. }
  647. }
  648. needToLoadDTD = false;
  649. }
  650. for (int i = 0; i < tokenisedDTD.size(); ++i)
  651. {
  652. if (tokenisedDTD[i] == entity)
  653. {
  654. if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
  655. {
  656. String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
  657. // check for sub-entities..
  658. int ampersand = ent.indexOfChar ('&');
  659. while (ampersand >= 0)
  660. {
  661. const int semiColon = ent.indexOf (i + 1, ";");
  662. if (semiColon < 0)
  663. {
  664. setLastError ("entity without terminating semi-colon", false);
  665. break;
  666. }
  667. const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
  668. ent = ent.substring (0, ampersand)
  669. + resolved
  670. + ent.substring (semiColon + 1);
  671. ampersand = ent.indexOfChar (semiColon + 1, '&');
  672. }
  673. return ent;
  674. }
  675. }
  676. }
  677. setLastError ("unknown entity", true);
  678. return entity;
  679. }
  680. String XmlDocument::getParameterEntity (const String& entity)
  681. {
  682. for (int i = 0; i < tokenisedDTD.size(); ++i)
  683. {
  684. if (tokenisedDTD[i] == entity
  685. && tokenisedDTD [i - 1] == "%"
  686. && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
  687. {
  688. const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
  689. if (ent.equalsIgnoreCase ("system"))
  690. return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
  691. else
  692. return ent.trim().unquoted();
  693. }
  694. }
  695. return entity;
  696. }
  697. END_JUCE_NAMESPACE