The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

666 lines
21KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 5 End-User License
  8. Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
  9. 27th April 2017).
  10. End User License Agreement: www.juce.com/juce-5-licence
  11. Privacy Policy: www.juce.com/juce-5-privacy-policy
  12. Or: You may also use this code under the terms of the GPL v3 (see
  13. www.gnu.org/licenses).
  14. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  15. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  16. DISCLAIMED.
  17. ==============================================================================
  18. */
  19. namespace juce
  20. {
  21. //==============================================================================
  22. /** Class containing some basic functions for simple tokenising of C++ code.
  23. */
  24. struct CppTokeniserFunctions
  25. {
  26. static bool isIdentifierStart (const juce_wchar c) noexcept
  27. {
  28. return CharacterFunctions::isLetter (c)
  29. || c == '_' || c == '@';
  30. }
  31. static bool isIdentifierBody (const juce_wchar c) noexcept
  32. {
  33. return CharacterFunctions::isLetterOrDigit (c)
  34. || c == '_' || c == '@';
  35. }
  36. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  37. {
  38. static const char* const keywords2Char[] =
  39. { "if", "do", "or", nullptr };
  40. static const char* const keywords3Char[] =
  41. { "for", "int", "new", "try", "xor", "and", "asm", "not", nullptr };
  42. static const char* const keywords4Char[] =
  43. { "bool", "void", "this", "true", "long", "else", "char",
  44. "enum", "case", "goto", "auto", nullptr };
  45. static const char* const keywords5Char[] =
  46. { "float", "const", "while", "break", "false", "catch", "class", "bitor",
  47. "compl", "or_eq", "short", "throw", "union", "using", "final", nullptr };
  48. static const char* const keywords6Char[] =
  49. { "return", "and_eq", "bitand", "delete", "double", "export", "extern",
  50. "friend", "inline", "not_eq", "public", "signed", "sizeof", "static",
  51. "struct", "switch", "typeid", "xor_eq", nullptr };
  52. static const char* const keywords7Char[] =
  53. { "nullptr", "alignas", "alignof", "default", "mutable", "private",
  54. "typedef", "virtual", "wchar_t", "__cdecl", "_Pragma", "uint8_t", nullptr };
  55. static const char* const keywordsOther[] =
  56. { "char16_t", "char32_t", "const_cast", "constexpr", "continue", "decltype", "dynamic_cast",
  57. "explicit", "namespace", "noexcept", "operator", "protected", "register", "reinterpret_cast",
  58. "static_assert", "static_cast", "template", "thread_local", "typename", "unsigned", "volatile",
  59. "@class", "@dynamic", "@end", "@implementation", "@interface", "@public", "@private",
  60. "@protected", "@property", "@synthesize", "__fastcall", "__stdcall", nullptr };
  61. const char* const* k;
  62. switch (tokenLength)
  63. {
  64. case 2: k = keywords2Char; break;
  65. case 3: k = keywords3Char; break;
  66. case 4: k = keywords4Char; break;
  67. case 5: k = keywords5Char; break;
  68. case 6: k = keywords6Char; break;
  69. case 7: k = keywords7Char; break;
  70. default:
  71. if (tokenLength < 2 || tokenLength > 16)
  72. return false;
  73. k = keywordsOther;
  74. break;
  75. }
  76. for (int i = 0; k[i] != 0; ++i)
  77. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  78. return true;
  79. return false;
  80. }
  81. template <typename Iterator>
  82. static int parseIdentifier (Iterator& source) noexcept
  83. {
  84. int tokenLength = 0;
  85. String::CharPointerType::CharType possibleIdentifier [100];
  86. String::CharPointerType possible (possibleIdentifier);
  87. while (isIdentifierBody (source.peekNextChar()))
  88. {
  89. const juce_wchar c = source.nextChar();
  90. if (tokenLength < 20)
  91. possible.write (c);
  92. ++tokenLength;
  93. }
  94. if (tokenLength > 1 && tokenLength <= 16)
  95. {
  96. possible.writeNull();
  97. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  98. return CPlusPlusCodeTokeniser::tokenType_keyword;
  99. }
  100. return CPlusPlusCodeTokeniser::tokenType_identifier;
  101. }
  102. template <typename Iterator>
  103. static bool skipNumberSuffix (Iterator& source)
  104. {
  105. const juce_wchar c = source.peekNextChar();
  106. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  107. source.skip();
  108. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  109. return false;
  110. return true;
  111. }
  112. static bool isHexDigit (const juce_wchar c) noexcept
  113. {
  114. return (c >= '0' && c <= '9')
  115. || (c >= 'a' && c <= 'f')
  116. || (c >= 'A' && c <= 'F');
  117. }
  118. template <typename Iterator>
  119. static bool parseHexLiteral (Iterator& source) noexcept
  120. {
  121. if (source.peekNextChar() == '-')
  122. source.skip();
  123. if (source.nextChar() != '0')
  124. return false;
  125. juce_wchar c = source.nextChar();
  126. if (c != 'x' && c != 'X')
  127. return false;
  128. int numDigits = 0;
  129. while (isHexDigit (source.peekNextChar()))
  130. {
  131. ++numDigits;
  132. source.skip();
  133. }
  134. if (numDigits == 0)
  135. return false;
  136. return skipNumberSuffix (source);
  137. }
  138. static bool isOctalDigit (const juce_wchar c) noexcept
  139. {
  140. return c >= '0' && c <= '7';
  141. }
  142. template <typename Iterator>
  143. static bool parseOctalLiteral (Iterator& source) noexcept
  144. {
  145. if (source.peekNextChar() == '-')
  146. source.skip();
  147. if (source.nextChar() != '0')
  148. return false;
  149. if (! isOctalDigit (source.nextChar()))
  150. return false;
  151. while (isOctalDigit (source.peekNextChar()))
  152. source.skip();
  153. return skipNumberSuffix (source);
  154. }
  155. static bool isDecimalDigit (const juce_wchar c) noexcept
  156. {
  157. return c >= '0' && c <= '9';
  158. }
  159. template <typename Iterator>
  160. static bool parseDecimalLiteral (Iterator& source) noexcept
  161. {
  162. if (source.peekNextChar() == '-')
  163. source.skip();
  164. int numChars = 0;
  165. while (isDecimalDigit (source.peekNextChar()))
  166. {
  167. ++numChars;
  168. source.skip();
  169. }
  170. if (numChars == 0)
  171. return false;
  172. return skipNumberSuffix (source);
  173. }
  174. template <typename Iterator>
  175. static bool parseFloatLiteral (Iterator& source) noexcept
  176. {
  177. if (source.peekNextChar() == '-')
  178. source.skip();
  179. int numDigits = 0;
  180. while (isDecimalDigit (source.peekNextChar()))
  181. {
  182. source.skip();
  183. ++numDigits;
  184. }
  185. const bool hasPoint = (source.peekNextChar() == '.');
  186. if (hasPoint)
  187. {
  188. source.skip();
  189. while (isDecimalDigit (source.peekNextChar()))
  190. {
  191. source.skip();
  192. ++numDigits;
  193. }
  194. }
  195. if (numDigits == 0)
  196. return false;
  197. juce_wchar c = source.peekNextChar();
  198. const bool hasExponent = (c == 'e' || c == 'E');
  199. if (hasExponent)
  200. {
  201. source.skip();
  202. c = source.peekNextChar();
  203. if (c == '+' || c == '-')
  204. source.skip();
  205. int numExpDigits = 0;
  206. while (isDecimalDigit (source.peekNextChar()))
  207. {
  208. source.skip();
  209. ++numExpDigits;
  210. }
  211. if (numExpDigits == 0)
  212. return false;
  213. }
  214. c = source.peekNextChar();
  215. if (c == 'f' || c == 'F')
  216. source.skip();
  217. else if (! (hasExponent || hasPoint))
  218. return false;
  219. return true;
  220. }
  221. template <typename Iterator>
  222. static int parseNumber (Iterator& source)
  223. {
  224. const Iterator original (source);
  225. if (parseFloatLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_float;
  226. source = original;
  227. if (parseHexLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  228. source = original;
  229. if (parseOctalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  230. source = original;
  231. if (parseDecimalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  232. source = original;
  233. return CPlusPlusCodeTokeniser::tokenType_error;
  234. }
  235. template <typename Iterator>
  236. static void skipQuotedString (Iterator& source) noexcept
  237. {
  238. const juce_wchar quote = source.nextChar();
  239. for (;;)
  240. {
  241. const juce_wchar c = source.nextChar();
  242. if (c == quote || c == 0)
  243. break;
  244. if (c == '\\')
  245. source.skip();
  246. }
  247. }
  248. template <typename Iterator>
  249. static void skipComment (Iterator& source) noexcept
  250. {
  251. bool lastWasStar = false;
  252. for (;;)
  253. {
  254. const juce_wchar c = source.nextChar();
  255. if (c == 0 || (c == '/' && lastWasStar))
  256. break;
  257. lastWasStar = (c == '*');
  258. }
  259. }
  260. template <typename Iterator>
  261. static void skipPreprocessorLine (Iterator& source) noexcept
  262. {
  263. bool lastWasBackslash = false;
  264. for (;;)
  265. {
  266. const juce_wchar c = source.peekNextChar();
  267. if (c == '"')
  268. {
  269. skipQuotedString (source);
  270. continue;
  271. }
  272. if (c == '/')
  273. {
  274. Iterator next (source);
  275. next.skip();
  276. const juce_wchar c2 = next.peekNextChar();
  277. if (c2 == '/' || c2 == '*')
  278. return;
  279. }
  280. if (c == 0)
  281. break;
  282. if (c == '\n' || c == '\r')
  283. {
  284. source.skipToEndOfLine();
  285. if (lastWasBackslash)
  286. skipPreprocessorLine (source);
  287. break;
  288. }
  289. lastWasBackslash = (c == '\\');
  290. source.skip();
  291. }
  292. }
  293. template <typename Iterator>
  294. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
  295. {
  296. if (source.peekNextChar() == c)
  297. source.skip();
  298. }
  299. template <typename Iterator>
  300. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
  301. {
  302. const juce_wchar c = source.peekNextChar();
  303. if (c == c1 || c == c2)
  304. source.skip();
  305. }
  306. template <typename Iterator>
  307. static int readNextToken (Iterator& source)
  308. {
  309. source.skipWhitespace();
  310. const juce_wchar firstChar = source.peekNextChar();
  311. switch (firstChar)
  312. {
  313. case 0:
  314. break;
  315. case '0': case '1': case '2': case '3': case '4':
  316. case '5': case '6': case '7': case '8': case '9':
  317. case '.':
  318. {
  319. int result = parseNumber (source);
  320. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  321. {
  322. source.skip();
  323. if (firstChar == '.')
  324. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  325. }
  326. return result;
  327. }
  328. case ',':
  329. case ';':
  330. case ':':
  331. source.skip();
  332. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  333. case '(': case ')':
  334. case '{': case '}':
  335. case '[': case ']':
  336. source.skip();
  337. return CPlusPlusCodeTokeniser::tokenType_bracket;
  338. case '"':
  339. case '\'':
  340. skipQuotedString (source);
  341. return CPlusPlusCodeTokeniser::tokenType_string;
  342. case '+':
  343. source.skip();
  344. skipIfNextCharMatches (source, '+', '=');
  345. return CPlusPlusCodeTokeniser::tokenType_operator;
  346. case '-':
  347. {
  348. source.skip();
  349. int result = parseNumber (source);
  350. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  351. {
  352. skipIfNextCharMatches (source, '-', '=');
  353. return CPlusPlusCodeTokeniser::tokenType_operator;
  354. }
  355. return result;
  356. }
  357. case '*': case '%':
  358. case '=': case '!':
  359. source.skip();
  360. skipIfNextCharMatches (source, '=');
  361. return CPlusPlusCodeTokeniser::tokenType_operator;
  362. case '/':
  363. {
  364. source.skip();
  365. juce_wchar nextChar = source.peekNextChar();
  366. if (nextChar == '/')
  367. {
  368. source.skipToEndOfLine();
  369. return CPlusPlusCodeTokeniser::tokenType_comment;
  370. }
  371. if (nextChar == '*')
  372. {
  373. source.skip();
  374. skipComment (source);
  375. return CPlusPlusCodeTokeniser::tokenType_comment;
  376. }
  377. if (nextChar == '=')
  378. source.skip();
  379. return CPlusPlusCodeTokeniser::tokenType_operator;
  380. }
  381. case '?':
  382. case '~':
  383. source.skip();
  384. return CPlusPlusCodeTokeniser::tokenType_operator;
  385. case '<': case '>':
  386. case '|': case '&': case '^':
  387. source.skip();
  388. skipIfNextCharMatches (source, firstChar);
  389. skipIfNextCharMatches (source, '=');
  390. return CPlusPlusCodeTokeniser::tokenType_operator;
  391. case '#':
  392. skipPreprocessorLine (source);
  393. return CPlusPlusCodeTokeniser::tokenType_preprocessor;
  394. default:
  395. if (isIdentifierStart (firstChar))
  396. return parseIdentifier (source);
  397. source.skip();
  398. break;
  399. }
  400. return CPlusPlusCodeTokeniser::tokenType_error;
  401. }
  402. /** A class that can be passed to the CppTokeniserFunctions functions in order to
  403. parse a String.
  404. */
  405. struct StringIterator
  406. {
  407. StringIterator (const String& s) noexcept : t (s.getCharPointer()), numChars (0) {}
  408. StringIterator (String::CharPointerType s) noexcept : t (s), numChars (0) {}
  409. juce_wchar nextChar() noexcept { if (isEOF()) return 0; ++numChars; return t.getAndAdvance(); }
  410. juce_wchar peekNextChar()noexcept { return *t; }
  411. void skip() noexcept { if (! isEOF()) { ++t; ++numChars; } }
  412. void skipWhitespace() noexcept { while (t.isWhitespace()) skip(); }
  413. void skipToEndOfLine() noexcept { while (*t != '\r' && *t != '\n' && *t != 0) skip(); }
  414. bool isEOF() const noexcept { return t.isEmpty(); }
  415. String::CharPointerType t;
  416. int numChars;
  417. };
  418. //==============================================================================
  419. /** Takes a UTF8 string and writes it to a stream using standard C++ escape sequences for any
  420. non-ascii bytes.
  421. Although not strictly a tokenising function, this is still a function that often comes in
  422. handy when working with C++ code!
  423. Note that addEscapeChars() is easier to use than this function if you're working with Strings.
  424. @see addEscapeChars
  425. */
  426. static void writeEscapeChars (OutputStream& out, const char* utf8, const int numBytesToRead,
  427. const int maxCharsOnLine, const bool breakAtNewLines,
  428. const bool replaceSingleQuotes, const bool allowStringBreaks)
  429. {
  430. int charsOnLine = 0;
  431. bool lastWasHexEscapeCode = false;
  432. bool trigraphDetected = false;
  433. for (int i = 0; i < numBytesToRead || numBytesToRead < 0; ++i)
  434. {
  435. const unsigned char c = (unsigned char) utf8[i];
  436. bool startNewLine = false;
  437. switch (c)
  438. {
  439. case '\t': out << "\\t"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  440. case '\r': out << "\\r"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  441. case '\n': out << "\\n"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; startNewLine = breakAtNewLines; break;
  442. case '\\': out << "\\\\"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  443. case '\"': out << "\\\""; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  444. case '?':
  445. if (trigraphDetected)
  446. {
  447. out << "\\?";
  448. charsOnLine++;
  449. trigraphDetected = false;
  450. }
  451. else
  452. {
  453. out << "?";
  454. trigraphDetected = true;
  455. }
  456. lastWasHexEscapeCode = false;
  457. charsOnLine++;
  458. break;
  459. case 0:
  460. if (numBytesToRead < 0)
  461. return;
  462. out << "\\0";
  463. lastWasHexEscapeCode = true;
  464. trigraphDetected = false;
  465. charsOnLine += 2;
  466. break;
  467. case '\'':
  468. if (replaceSingleQuotes)
  469. {
  470. out << "\\\'";
  471. lastWasHexEscapeCode = false;
  472. trigraphDetected = false;
  473. charsOnLine += 2;
  474. break;
  475. }
  476. // deliberate fall-through...
  477. default:
  478. if (c >= 32 && c < 127 && ! (lastWasHexEscapeCode // (have to avoid following a hex escape sequence with a valid hex digit)
  479. && CharacterFunctions::getHexDigitValue (c) >= 0))
  480. {
  481. out << (char) c;
  482. lastWasHexEscapeCode = false;
  483. trigraphDetected = false;
  484. ++charsOnLine;
  485. }
  486. else if (allowStringBreaks && lastWasHexEscapeCode && c >= 32 && c < 127)
  487. {
  488. out << "\"\"" << (char) c;
  489. lastWasHexEscapeCode = false;
  490. trigraphDetected = false;
  491. charsOnLine += 3;
  492. }
  493. else
  494. {
  495. out << (c < 16 ? "\\x0" : "\\x") << String::toHexString ((int) c);
  496. lastWasHexEscapeCode = true;
  497. trigraphDetected = false;
  498. charsOnLine += 4;
  499. }
  500. break;
  501. }
  502. if ((startNewLine || (maxCharsOnLine > 0 && charsOnLine >= maxCharsOnLine))
  503. && (numBytesToRead < 0 || i < numBytesToRead - 1))
  504. {
  505. charsOnLine = 0;
  506. out << "\"" << newLine << "\"";
  507. lastWasHexEscapeCode = false;
  508. }
  509. }
  510. }
  511. /** Takes a string and returns a version of it where standard C++ escape sequences have been
  512. used to replace any non-ascii bytes.
  513. Although not strictly a tokenising function, this is still a function that often comes in
  514. handy when working with C++ code!
  515. @see writeEscapeChars
  516. */
  517. static String addEscapeChars (const String& s)
  518. {
  519. MemoryOutputStream mo;
  520. writeEscapeChars (mo, s.toRawUTF8(), -1, -1, false, true, true);
  521. return mo.toString();
  522. }
  523. };
  524. } // namespace juce