The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

893 lines
24KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-9 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. #include "../core/juce_StandardHeader.h"
  19. BEGIN_JUCE_NAMESPACE
  20. #include "juce_XmlDocument.h"
  21. #include "../io/streams/juce_FileInputSource.h"
  22. //==============================================================================
  23. XmlDocument::XmlDocument (const String& documentText)
  24. : originalText (documentText),
  25. ignoreEmptyTextElements (true)
  26. {
  27. }
  28. XmlDocument::XmlDocument (const File& file)
  29. {
  30. inputSource = new FileInputSource (file);
  31. }
  32. XmlDocument::~XmlDocument()
  33. {
  34. }
  35. void XmlDocument::setInputSource (InputSource* const newSource) throw()
  36. {
  37. inputSource = newSource;
  38. }
  39. void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) throw()
  40. {
  41. ignoreEmptyTextElements = shouldBeIgnored;
  42. }
  43. bool XmlDocument::isXmlIdentifierCharSlow (const juce_wchar c) throw()
  44. {
  45. return CharacterFunctions::isLetterOrDigit (c)
  46. || c == T('_')
  47. || c == T('-')
  48. || c == T(':')
  49. || c == T('.');
  50. }
  51. inline bool XmlDocument::isXmlIdentifierChar (const juce_wchar c) const throw()
  52. {
  53. return (c > 0 && c <= 127) ? identifierLookupTable [(int) c]
  54. : isXmlIdentifierCharSlow (c);
  55. }
  56. XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
  57. {
  58. String textToParse (originalText);
  59. if (textToParse.isEmpty() && inputSource != 0)
  60. {
  61. ScopedPointer <InputStream> in (inputSource->createInputStream());
  62. if (in != 0)
  63. {
  64. MemoryBlock data;
  65. in->readIntoMemoryBlock (data, onlyReadOuterDocumentElement ? 8192 : -1);
  66. if (data.getSize() >= 2
  67. && ((data[0] == (char)-2 && data[1] == (char)-1)
  68. || (data[0] == (char)-1 && data[1] == (char)-2)))
  69. {
  70. textToParse = String::createStringFromData ((const char*) data.getData(), (int) data.getSize());
  71. }
  72. else
  73. {
  74. textToParse = String::fromUTF8 ((const char*) data.getData(), (int) data.getSize());
  75. }
  76. if (! onlyReadOuterDocumentElement)
  77. originalText = textToParse;
  78. }
  79. }
  80. input = textToParse;
  81. lastError = String::empty;
  82. errorOccurred = false;
  83. outOfData = false;
  84. needToLoadDTD = true;
  85. for (int i = 0; i < 128; ++i)
  86. identifierLookupTable[i] = isXmlIdentifierCharSlow ((juce_wchar) i);
  87. if (textToParse.isEmpty())
  88. {
  89. lastError = "not enough input";
  90. }
  91. else
  92. {
  93. skipHeader();
  94. if (input != 0)
  95. {
  96. ScopedPointer <XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
  97. if (! errorOccurred)
  98. return result.release();
  99. }
  100. else
  101. {
  102. lastError = "incorrect xml header";
  103. }
  104. }
  105. return 0;
  106. }
  107. const String& XmlDocument::getLastParseError() const throw()
  108. {
  109. return lastError;
  110. }
  111. void XmlDocument::setLastError (const String& desc, const bool carryOn)
  112. {
  113. lastError = desc;
  114. errorOccurred = ! carryOn;
  115. }
  116. const String XmlDocument::getFileContents (const String& filename) const
  117. {
  118. if (inputSource != 0)
  119. {
  120. const ScopedPointer <InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
  121. if (in != 0)
  122. return in->readEntireStreamAsString();
  123. }
  124. return String::empty;
  125. }
  126. juce_wchar XmlDocument::readNextChar() throw()
  127. {
  128. if (*input != 0)
  129. {
  130. return *input++;
  131. }
  132. else
  133. {
  134. outOfData = true;
  135. return 0;
  136. }
  137. }
  138. int XmlDocument::findNextTokenLength() throw()
  139. {
  140. int len = 0;
  141. juce_wchar c = *input;
  142. while (isXmlIdentifierChar (c))
  143. c = input [++len];
  144. return len;
  145. }
  146. void XmlDocument::skipHeader()
  147. {
  148. const juce_wchar* const found = CharacterFunctions::find (input, T("<?xml"));
  149. if (found != 0)
  150. {
  151. input = found;
  152. input = CharacterFunctions::find (input, T("?>"));
  153. if (input == 0)
  154. return;
  155. input += 2;
  156. }
  157. skipNextWhiteSpace();
  158. const juce_wchar* docType = CharacterFunctions::find (input, T("<!DOCTYPE"));
  159. if (docType == 0)
  160. return;
  161. input = docType + 9;
  162. int n = 1;
  163. while (n > 0)
  164. {
  165. const juce_wchar c = readNextChar();
  166. if (outOfData)
  167. return;
  168. if (c == T('<'))
  169. ++n;
  170. else if (c == T('>'))
  171. --n;
  172. }
  173. docType += 9;
  174. dtdText = String (docType, (int) (input - (docType + 1))).trim();
  175. }
  176. void XmlDocument::skipNextWhiteSpace()
  177. {
  178. for (;;)
  179. {
  180. juce_wchar c = *input;
  181. while (CharacterFunctions::isWhitespace (c))
  182. c = *++input;
  183. if (c == 0)
  184. {
  185. outOfData = true;
  186. break;
  187. }
  188. else if (c == T('<'))
  189. {
  190. if (input[1] == T('!')
  191. && input[2] == T('-')
  192. && input[3] == T('-'))
  193. {
  194. const juce_wchar* const closeComment = CharacterFunctions::find (input, T("-->"));
  195. if (closeComment == 0)
  196. {
  197. outOfData = true;
  198. break;
  199. }
  200. input = closeComment + 3;
  201. continue;
  202. }
  203. else if (input[1] == T('?'))
  204. {
  205. const juce_wchar* const closeBracket = CharacterFunctions::find (input, T("?>"));
  206. if (closeBracket == 0)
  207. {
  208. outOfData = true;
  209. break;
  210. }
  211. input = closeBracket + 2;
  212. continue;
  213. }
  214. }
  215. break;
  216. }
  217. }
  218. void XmlDocument::readQuotedString (String& result)
  219. {
  220. const juce_wchar quote = readNextChar();
  221. while (! outOfData)
  222. {
  223. const juce_wchar c = readNextChar();
  224. if (c == quote)
  225. break;
  226. if (c == T('&'))
  227. {
  228. --input;
  229. readEntity (result);
  230. }
  231. else
  232. {
  233. --input;
  234. const juce_wchar* const start = input;
  235. for (;;)
  236. {
  237. const juce_wchar character = *input;
  238. if (character == quote)
  239. {
  240. result.append (start, (int) (input - start));
  241. ++input;
  242. return;
  243. }
  244. else if (character == T('&'))
  245. {
  246. result.append (start, (int) (input - start));
  247. break;
  248. }
  249. else if (character == 0)
  250. {
  251. outOfData = true;
  252. setLastError ("unmatched quotes", false);
  253. break;
  254. }
  255. ++input;
  256. }
  257. }
  258. }
  259. }
  260. XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
  261. {
  262. XmlElement* node = 0;
  263. skipNextWhiteSpace();
  264. if (outOfData)
  265. return 0;
  266. input = CharacterFunctions::find (input, T("<"));
  267. if (input != 0)
  268. {
  269. ++input;
  270. int tagLen = findNextTokenLength();
  271. if (tagLen == 0)
  272. {
  273. // no tag name - but allow for a gap after the '<' before giving an error
  274. skipNextWhiteSpace();
  275. tagLen = findNextTokenLength();
  276. if (tagLen == 0)
  277. {
  278. setLastError ("tag name missing", false);
  279. return node;
  280. }
  281. }
  282. node = new XmlElement (String (input, tagLen));
  283. input += tagLen;
  284. XmlElement::XmlAttributeNode* lastAttribute = 0;
  285. // look for attributes
  286. for (;;)
  287. {
  288. skipNextWhiteSpace();
  289. const juce_wchar c = *input;
  290. // empty tag..
  291. if (c == T('/') && input[1] == T('>'))
  292. {
  293. input += 2;
  294. break;
  295. }
  296. // parse the guts of the element..
  297. if (c == T('>'))
  298. {
  299. ++input;
  300. skipNextWhiteSpace();
  301. if (alsoParseSubElements)
  302. readChildElements (node);
  303. break;
  304. }
  305. // get an attribute..
  306. if (isXmlIdentifierChar (c))
  307. {
  308. const int attNameLen = findNextTokenLength();
  309. if (attNameLen > 0)
  310. {
  311. const juce_wchar* attNameStart = input;
  312. input += attNameLen;
  313. skipNextWhiteSpace();
  314. if (readNextChar() == T('='))
  315. {
  316. skipNextWhiteSpace();
  317. const juce_wchar nextChar = *input;
  318. if (nextChar == T('"') || nextChar == T('\''))
  319. {
  320. XmlElement::XmlAttributeNode* const newAtt
  321. = new XmlElement::XmlAttributeNode (String (attNameStart, attNameLen),
  322. String::empty);
  323. readQuotedString (newAtt->value);
  324. if (lastAttribute == 0)
  325. node->attributes = newAtt;
  326. else
  327. lastAttribute->next = newAtt;
  328. lastAttribute = newAtt;
  329. continue;
  330. }
  331. }
  332. }
  333. }
  334. else
  335. {
  336. if (! outOfData)
  337. setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
  338. }
  339. break;
  340. }
  341. }
  342. return node;
  343. }
  344. void XmlDocument::readChildElements (XmlElement* parent)
  345. {
  346. XmlElement* lastChildNode = 0;
  347. for (;;)
  348. {
  349. skipNextWhiteSpace();
  350. if (outOfData)
  351. {
  352. setLastError ("unmatched tags", false);
  353. break;
  354. }
  355. if (*input == T('<'))
  356. {
  357. if (input[1] == T('/'))
  358. {
  359. // our close tag..
  360. input = CharacterFunctions::find (input, T(">"));
  361. ++input;
  362. break;
  363. }
  364. else if (input[1] == T('!')
  365. && input[2] == T('[')
  366. && input[3] == T('C')
  367. && input[4] == T('D')
  368. && input[5] == T('A')
  369. && input[6] == T('T')
  370. && input[7] == T('A')
  371. && input[8] == T('['))
  372. {
  373. input += 9;
  374. const juce_wchar* const inputStart = input;
  375. int len = 0;
  376. for (;;)
  377. {
  378. if (*input == 0)
  379. {
  380. setLastError ("unterminated CDATA section", false);
  381. outOfData = true;
  382. break;
  383. }
  384. else if (input[0] == T(']')
  385. && input[1] == T(']')
  386. && input[2] == T('>'))
  387. {
  388. input += 3;
  389. break;
  390. }
  391. ++input;
  392. ++len;
  393. }
  394. XmlElement* const e = new XmlElement ((int) 0);
  395. e->setText (String (inputStart, len));
  396. if (lastChildNode != 0)
  397. lastChildNode->nextElement = e;
  398. else
  399. parent->addChildElement (e);
  400. lastChildNode = e;
  401. }
  402. else
  403. {
  404. // this is some other element, so parse and add it..
  405. XmlElement* const n = readNextElement (true);
  406. if (n != 0)
  407. {
  408. if (lastChildNode == 0)
  409. parent->addChildElement (n);
  410. else
  411. lastChildNode->nextElement = n;
  412. lastChildNode = n;
  413. }
  414. else
  415. {
  416. return;
  417. }
  418. }
  419. }
  420. else
  421. {
  422. // read character block..
  423. XmlElement* const e = new XmlElement ((int)0);
  424. if (lastChildNode != 0)
  425. lastChildNode->nextElement = e;
  426. else
  427. parent->addChildElement (e);
  428. lastChildNode = e;
  429. String textElementContent;
  430. for (;;)
  431. {
  432. const juce_wchar c = *input;
  433. if (c == T('<'))
  434. break;
  435. if (c == 0)
  436. {
  437. setLastError ("unmatched tags", false);
  438. outOfData = true;
  439. return;
  440. }
  441. if (c == T('&'))
  442. {
  443. String entity;
  444. readEntity (entity);
  445. if (entity.startsWithChar (T('<')) && entity [1] != 0)
  446. {
  447. const juce_wchar* const oldInput = input;
  448. const bool oldOutOfData = outOfData;
  449. input = entity;
  450. outOfData = false;
  451. for (;;)
  452. {
  453. XmlElement* const n = readNextElement (true);
  454. if (n == 0)
  455. break;
  456. if (lastChildNode == 0)
  457. parent->addChildElement (n);
  458. else
  459. lastChildNode->nextElement = n;
  460. lastChildNode = n;
  461. }
  462. input = oldInput;
  463. outOfData = oldOutOfData;
  464. }
  465. else
  466. {
  467. textElementContent += entity;
  468. }
  469. }
  470. else
  471. {
  472. const juce_wchar* start = input;
  473. int len = 0;
  474. for (;;)
  475. {
  476. const juce_wchar nextChar = *input;
  477. if (nextChar == T('<') || nextChar == T('&'))
  478. {
  479. break;
  480. }
  481. else if (nextChar == 0)
  482. {
  483. setLastError ("unmatched tags", false);
  484. outOfData = true;
  485. return;
  486. }
  487. ++input;
  488. ++len;
  489. }
  490. textElementContent.append (start, len);
  491. }
  492. }
  493. if (ignoreEmptyTextElements ? textElementContent.containsNonWhitespaceChars()
  494. : textElementContent.isNotEmpty())
  495. e->setText (textElementContent);
  496. }
  497. }
  498. }
  499. void XmlDocument::readEntity (String& result)
  500. {
  501. // skip over the ampersand
  502. ++input;
  503. if (CharacterFunctions::compareIgnoreCase (input, T("amp;"), 4) == 0)
  504. {
  505. input += 4;
  506. result += T("&");
  507. }
  508. else if (CharacterFunctions::compareIgnoreCase (input, T("quot;"), 5) == 0)
  509. {
  510. input += 5;
  511. result += T("\"");
  512. }
  513. else if (CharacterFunctions::compareIgnoreCase (input, T("apos;"), 5) == 0)
  514. {
  515. input += 5;
  516. result += T("\'");
  517. }
  518. else if (CharacterFunctions::compareIgnoreCase (input, T("lt;"), 3) == 0)
  519. {
  520. input += 3;
  521. result += T("<");
  522. }
  523. else if (CharacterFunctions::compareIgnoreCase (input, T("gt;"), 3) == 0)
  524. {
  525. input += 3;
  526. result += T(">");
  527. }
  528. else if (*input == T('#'))
  529. {
  530. int charCode = 0;
  531. ++input;
  532. if (*input == T('x') || *input == T('X'))
  533. {
  534. ++input;
  535. int numChars = 0;
  536. while (input[0] != T(';'))
  537. {
  538. const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
  539. if (hexValue < 0 || ++numChars > 8)
  540. {
  541. setLastError ("illegal escape sequence", true);
  542. break;
  543. }
  544. charCode = (charCode << 4) | hexValue;
  545. ++input;
  546. }
  547. ++input;
  548. }
  549. else if (input[0] >= T('0') && input[0] <= T('9'))
  550. {
  551. int numChars = 0;
  552. while (input[0] != T(';'))
  553. {
  554. if (++numChars > 12)
  555. {
  556. setLastError ("illegal escape sequence", true);
  557. break;
  558. }
  559. charCode = charCode * 10 + (input[0] - T('0'));
  560. ++input;
  561. }
  562. ++input;
  563. }
  564. else
  565. {
  566. setLastError ("illegal escape sequence", true);
  567. result += T("&");
  568. return;
  569. }
  570. result << (juce_wchar) charCode;
  571. }
  572. else
  573. {
  574. const juce_wchar* const entityNameStart = input;
  575. const juce_wchar* const closingSemiColon = CharacterFunctions::find (input, T(";"));
  576. if (closingSemiColon == 0)
  577. {
  578. outOfData = true;
  579. result += T("&");
  580. }
  581. else
  582. {
  583. input = closingSemiColon + 1;
  584. result += expandExternalEntity (String (entityNameStart,
  585. (int) (closingSemiColon - entityNameStart)));
  586. }
  587. }
  588. }
  589. const String XmlDocument::expandEntity (const String& ent)
  590. {
  591. if (ent.equalsIgnoreCase (T("amp")))
  592. return T("&");
  593. if (ent.equalsIgnoreCase (T("quot")))
  594. return T("\"");
  595. if (ent.equalsIgnoreCase (T("apos")))
  596. return T("\'");
  597. if (ent.equalsIgnoreCase (T("lt")))
  598. return T("<");
  599. if (ent.equalsIgnoreCase (T("gt")))
  600. return T(">");
  601. if (ent[0] == T('#'))
  602. {
  603. if (ent[1] == T('x') || ent[1] == T('X'))
  604. return String::charToString (static_cast <juce_wchar> (ent.substring (2).getHexValue32()));
  605. if (ent[1] >= T('0') && ent[1] <= T('9'))
  606. return String::charToString (static_cast <juce_wchar> (ent.substring (1).getIntValue()));
  607. setLastError ("illegal escape sequence", false);
  608. return T("&");
  609. }
  610. return expandExternalEntity (ent);
  611. }
  612. const String XmlDocument::expandExternalEntity (const String& entity)
  613. {
  614. if (needToLoadDTD)
  615. {
  616. if (dtdText.isNotEmpty())
  617. {
  618. while (dtdText.endsWithChar (T('>')))
  619. dtdText = dtdText.dropLastCharacters (1);
  620. tokenisedDTD.addTokens (dtdText, true);
  621. if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase (T("system"))
  622. && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
  623. {
  624. const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
  625. tokenisedDTD.clear();
  626. tokenisedDTD.addTokens (getFileContents (fn), true);
  627. }
  628. else
  629. {
  630. tokenisedDTD.clear();
  631. const int openBracket = dtdText.indexOfChar (T('['));
  632. if (openBracket > 0)
  633. {
  634. const int closeBracket = dtdText.lastIndexOfChar (T(']'));
  635. if (closeBracket > openBracket)
  636. tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
  637. closeBracket), true);
  638. }
  639. }
  640. for (int i = tokenisedDTD.size(); --i >= 0;)
  641. {
  642. if (tokenisedDTD[i].startsWithChar (T('%'))
  643. && tokenisedDTD[i].endsWithChar (T(';')))
  644. {
  645. const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
  646. StringArray newToks;
  647. newToks.addTokens (parsed, true);
  648. tokenisedDTD.remove (i);
  649. for (int j = newToks.size(); --j >= 0;)
  650. tokenisedDTD.insert (i, newToks[j]);
  651. }
  652. }
  653. }
  654. needToLoadDTD = false;
  655. }
  656. for (int i = 0; i < tokenisedDTD.size(); ++i)
  657. {
  658. if (tokenisedDTD[i] == entity)
  659. {
  660. if (tokenisedDTD[i - 1].equalsIgnoreCase (T("<!entity")))
  661. {
  662. String ent (tokenisedDTD [i + 1]);
  663. while (ent.endsWithChar (T('>')))
  664. ent = ent.dropLastCharacters (1);
  665. ent = ent.trim().unquoted();
  666. // check for sub-entities..
  667. int ampersand = ent.indexOfChar (T('&'));
  668. while (ampersand >= 0)
  669. {
  670. const int semiColon = ent.indexOf (i + 1, T(";"));
  671. if (semiColon < 0)
  672. {
  673. setLastError ("entity without terminating semi-colon", false);
  674. break;
  675. }
  676. const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
  677. ent = ent.substring (0, ampersand)
  678. + resolved
  679. + ent.substring (semiColon + 1);
  680. ampersand = ent.indexOfChar (semiColon + 1, T('&'));
  681. }
  682. return ent;
  683. }
  684. }
  685. }
  686. setLastError ("unknown entity", true);
  687. return entity;
  688. }
  689. const String XmlDocument::getParameterEntity (const String& entity)
  690. {
  691. for (int i = 0; i < tokenisedDTD.size(); ++i)
  692. {
  693. if (tokenisedDTD[i] == entity)
  694. {
  695. if (tokenisedDTD [i - 1] == T("%")
  696. && tokenisedDTD [i - 2].equalsIgnoreCase (T("<!entity")))
  697. {
  698. String ent (tokenisedDTD [i + 1]);
  699. while (ent.endsWithChar (T('>')))
  700. ent = ent.dropLastCharacters (1);
  701. if (ent.equalsIgnoreCase (T("system")))
  702. {
  703. String filename (tokenisedDTD [i + 2]);
  704. while (filename.endsWithChar (T('>')))
  705. filename = filename.dropLastCharacters (1);
  706. return getFileContents (filename);
  707. }
  708. else
  709. {
  710. return ent.trim().unquoted();
  711. }
  712. }
  713. }
  714. }
  715. return entity;
  716. }
  717. END_JUCE_NAMESPACE