The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

675 lines
21KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2020 - Raw Material Software Limited
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 6 End-User License
  8. Agreement and JUCE Privacy Policy (both effective as of the 16th June 2020).
  9. End User License Agreement: www.juce.com/juce-6-licence
  10. Privacy Policy: www.juce.com/juce-privacy-policy
  11. Or: You may also use this code under the terms of the GPL v3 (see
  12. www.gnu.org/licenses).
  13. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  14. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  15. DISCLAIMED.
  16. ==============================================================================
  17. */
  18. namespace juce
  19. {
  20. //==============================================================================
  21. /** Class containing some basic functions for simple tokenising of C++ code.
  22. @tags{GUI}
  23. */
  24. struct CppTokeniserFunctions
  25. {
  26. static bool isIdentifierStart (const juce_wchar c) noexcept
  27. {
  28. return CharacterFunctions::isLetter (c)
  29. || c == '_' || c == '@';
  30. }
  31. static bool isIdentifierBody (const juce_wchar c) noexcept
  32. {
  33. return CharacterFunctions::isLetterOrDigit (c)
  34. || c == '_' || c == '@';
  35. }
  36. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  37. {
  38. static const char* const keywords2Char[] =
  39. { "do", "if", "or", nullptr };
  40. static const char* const keywords3Char[] =
  41. { "and", "asm", "for", "int", "new", "not", "try", "xor", nullptr };
  42. static const char* const keywords4Char[] =
  43. { "auto", "bool", "case", "char", "else", "enum", "goto",
  44. "long", "this", "true", "void", nullptr };
  45. static const char* const keywords5Char[] =
  46. { "bitor", "break", "catch", "class", "compl", "const", "false", "final",
  47. "float", "or_eq", "short", "throw", "union", "using", "while", nullptr };
  48. static const char* const keywords6Char[] =
  49. { "and_eq", "bitand", "delete", "double", "export", "extern", "friend",
  50. "import", "inline", "module", "not_eq", "public", "return", "signed",
  51. "sizeof", "static", "struct", "switch", "typeid", "xor_eq", nullptr };
  52. static const char* const keywords7Char[] =
  53. { "__cdecl", "_Pragma", "alignas", "alignof", "concept", "default",
  54. "mutable", "nullptr", "private", "typedef", "uint8_t", "virtual",
  55. "wchar_t", nullptr };
  56. static const char* const keywordsOther[] =
  57. { "@class", "@dynamic", "@end", "@implementation", "@interface", "@public",
  58. "@private", "@protected", "@property", "@synthesize", "__fastcall", "__stdcall",
  59. "atomic_cancel", "atomic_commit", "atomic_noexcept", "char16_t", "char32_t",
  60. "co_await", "co_return", "co_yield", "const_cast", "constexpr", "continue",
  61. "decltype", "dynamic_cast", "explicit", "namespace", "noexcept", "operator", "override",
  62. "protected", "register", "reinterpret_cast", "requires", "static_assert",
  63. "static_cast", "synchronized", "template", "thread_local", "typename", "unsigned",
  64. "volatile", nullptr };
  65. const char* const* k;
  66. switch (tokenLength)
  67. {
  68. case 2: k = keywords2Char; break;
  69. case 3: k = keywords3Char; break;
  70. case 4: k = keywords4Char; break;
  71. case 5: k = keywords5Char; break;
  72. case 6: k = keywords6Char; break;
  73. case 7: k = keywords7Char; break;
  74. default:
  75. if (tokenLength < 2 || tokenLength > 16)
  76. return false;
  77. k = keywordsOther;
  78. break;
  79. }
  80. for (int i = 0; k[i] != nullptr; ++i)
  81. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  82. return true;
  83. return false;
  84. }
  85. template <typename Iterator>
  86. static int parseIdentifier (Iterator& source) noexcept
  87. {
  88. int tokenLength = 0;
  89. String::CharPointerType::CharType possibleIdentifier[100];
  90. String::CharPointerType possible (possibleIdentifier);
  91. while (isIdentifierBody (source.peekNextChar()))
  92. {
  93. auto c = source.nextChar();
  94. if (tokenLength < 20)
  95. possible.write (c);
  96. ++tokenLength;
  97. }
  98. if (tokenLength > 1 && tokenLength <= 16)
  99. {
  100. possible.writeNull();
  101. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  102. return CPlusPlusCodeTokeniser::tokenType_keyword;
  103. }
  104. return CPlusPlusCodeTokeniser::tokenType_identifier;
  105. }
  106. template <typename Iterator>
  107. static bool skipNumberSuffix (Iterator& source)
  108. {
  109. auto c = source.peekNextChar();
  110. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  111. source.skip();
  112. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  113. return false;
  114. return true;
  115. }
  116. static bool isHexDigit (const juce_wchar c) noexcept
  117. {
  118. return (c >= '0' && c <= '9')
  119. || (c >= 'a' && c <= 'f')
  120. || (c >= 'A' && c <= 'F');
  121. }
  122. template <typename Iterator>
  123. static bool parseHexLiteral (Iterator& source) noexcept
  124. {
  125. if (source.peekNextChar() == '-')
  126. source.skip();
  127. if (source.nextChar() != '0')
  128. return false;
  129. auto c = source.nextChar();
  130. if (c != 'x' && c != 'X')
  131. return false;
  132. int numDigits = 0;
  133. while (isHexDigit (source.peekNextChar()))
  134. {
  135. ++numDigits;
  136. source.skip();
  137. }
  138. if (numDigits == 0)
  139. return false;
  140. return skipNumberSuffix (source);
  141. }
  142. static bool isOctalDigit (const juce_wchar c) noexcept
  143. {
  144. return c >= '0' && c <= '7';
  145. }
  146. template <typename Iterator>
  147. static bool parseOctalLiteral (Iterator& source) noexcept
  148. {
  149. if (source.peekNextChar() == '-')
  150. source.skip();
  151. if (source.nextChar() != '0')
  152. return false;
  153. if (! isOctalDigit (source.nextChar()))
  154. return false;
  155. while (isOctalDigit (source.peekNextChar()))
  156. source.skip();
  157. return skipNumberSuffix (source);
  158. }
  159. static bool isDecimalDigit (const juce_wchar c) noexcept
  160. {
  161. return c >= '0' && c <= '9';
  162. }
  163. template <typename Iterator>
  164. static bool parseDecimalLiteral (Iterator& source) noexcept
  165. {
  166. if (source.peekNextChar() == '-')
  167. source.skip();
  168. int numChars = 0;
  169. while (isDecimalDigit (source.peekNextChar()))
  170. {
  171. ++numChars;
  172. source.skip();
  173. }
  174. if (numChars == 0)
  175. return false;
  176. return skipNumberSuffix (source);
  177. }
  178. template <typename Iterator>
  179. static bool parseFloatLiteral (Iterator& source) noexcept
  180. {
  181. if (source.peekNextChar() == '-')
  182. source.skip();
  183. int numDigits = 0;
  184. while (isDecimalDigit (source.peekNextChar()))
  185. {
  186. source.skip();
  187. ++numDigits;
  188. }
  189. const bool hasPoint = (source.peekNextChar() == '.');
  190. if (hasPoint)
  191. {
  192. source.skip();
  193. while (isDecimalDigit (source.peekNextChar()))
  194. {
  195. source.skip();
  196. ++numDigits;
  197. }
  198. }
  199. if (numDigits == 0)
  200. return false;
  201. auto c = source.peekNextChar();
  202. bool hasExponent = (c == 'e' || c == 'E');
  203. if (hasExponent)
  204. {
  205. source.skip();
  206. c = source.peekNextChar();
  207. if (c == '+' || c == '-')
  208. source.skip();
  209. int numExpDigits = 0;
  210. while (isDecimalDigit (source.peekNextChar()))
  211. {
  212. source.skip();
  213. ++numExpDigits;
  214. }
  215. if (numExpDigits == 0)
  216. return false;
  217. }
  218. c = source.peekNextChar();
  219. if (c == 'f' || c == 'F')
  220. source.skip();
  221. else if (! (hasExponent || hasPoint))
  222. return false;
  223. return true;
  224. }
  225. template <typename Iterator>
  226. static int parseNumber (Iterator& source)
  227. {
  228. const Iterator original (source);
  229. if (parseFloatLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_float;
  230. source = original;
  231. if (parseHexLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  232. source = original;
  233. if (parseOctalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  234. source = original;
  235. if (parseDecimalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  236. source = original;
  237. return CPlusPlusCodeTokeniser::tokenType_error;
  238. }
  239. template <typename Iterator>
  240. static void skipQuotedString (Iterator& source) noexcept
  241. {
  242. auto quote = source.nextChar();
  243. for (;;)
  244. {
  245. auto c = source.nextChar();
  246. if (c == quote || c == 0)
  247. break;
  248. if (c == '\\')
  249. source.skip();
  250. }
  251. }
  252. template <typename Iterator>
  253. static void skipComment (Iterator& source) noexcept
  254. {
  255. bool lastWasStar = false;
  256. for (;;)
  257. {
  258. auto c = source.nextChar();
  259. if (c == 0 || (c == '/' && lastWasStar))
  260. break;
  261. lastWasStar = (c == '*');
  262. }
  263. }
  264. template <typename Iterator>
  265. static void skipPreprocessorLine (Iterator& source) noexcept
  266. {
  267. bool lastWasBackslash = false;
  268. for (;;)
  269. {
  270. auto c = source.peekNextChar();
  271. if (c == '"')
  272. {
  273. skipQuotedString (source);
  274. continue;
  275. }
  276. if (c == '/')
  277. {
  278. Iterator next (source);
  279. next.skip();
  280. auto c2 = next.peekNextChar();
  281. if (c2 == '/' || c2 == '*')
  282. return;
  283. }
  284. if (c == 0)
  285. break;
  286. if (c == '\n' || c == '\r')
  287. {
  288. source.skipToEndOfLine();
  289. if (lastWasBackslash)
  290. skipPreprocessorLine (source);
  291. break;
  292. }
  293. lastWasBackslash = (c == '\\');
  294. source.skip();
  295. }
  296. }
  297. template <typename Iterator>
  298. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
  299. {
  300. if (source.peekNextChar() == c)
  301. source.skip();
  302. }
  303. template <typename Iterator>
  304. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
  305. {
  306. auto c = source.peekNextChar();
  307. if (c == c1 || c == c2)
  308. source.skip();
  309. }
  310. template <typename Iterator>
  311. static int readNextToken (Iterator& source)
  312. {
  313. source.skipWhitespace();
  314. auto firstChar = source.peekNextChar();
  315. switch (firstChar)
  316. {
  317. case 0:
  318. break;
  319. case '0': case '1': case '2': case '3': case '4':
  320. case '5': case '6': case '7': case '8': case '9':
  321. case '.':
  322. {
  323. auto result = parseNumber (source);
  324. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  325. {
  326. source.skip();
  327. if (firstChar == '.')
  328. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  329. }
  330. return result;
  331. }
  332. case ',':
  333. case ';':
  334. case ':':
  335. source.skip();
  336. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  337. case '(': case ')':
  338. case '{': case '}':
  339. case '[': case ']':
  340. source.skip();
  341. return CPlusPlusCodeTokeniser::tokenType_bracket;
  342. case '"':
  343. case '\'':
  344. skipQuotedString (source);
  345. return CPlusPlusCodeTokeniser::tokenType_string;
  346. case '+':
  347. source.skip();
  348. skipIfNextCharMatches (source, '+', '=');
  349. return CPlusPlusCodeTokeniser::tokenType_operator;
  350. case '-':
  351. {
  352. source.skip();
  353. auto result = parseNumber (source);
  354. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  355. {
  356. skipIfNextCharMatches (source, '-', '=');
  357. return CPlusPlusCodeTokeniser::tokenType_operator;
  358. }
  359. return result;
  360. }
  361. case '*': case '%':
  362. case '=': case '!':
  363. source.skip();
  364. skipIfNextCharMatches (source, '=');
  365. return CPlusPlusCodeTokeniser::tokenType_operator;
  366. case '/':
  367. {
  368. source.skip();
  369. auto nextChar = source.peekNextChar();
  370. if (nextChar == '/')
  371. {
  372. source.skipToEndOfLine();
  373. return CPlusPlusCodeTokeniser::tokenType_comment;
  374. }
  375. if (nextChar == '*')
  376. {
  377. source.skip();
  378. skipComment (source);
  379. return CPlusPlusCodeTokeniser::tokenType_comment;
  380. }
  381. if (nextChar == '=')
  382. source.skip();
  383. return CPlusPlusCodeTokeniser::tokenType_operator;
  384. }
  385. case '?':
  386. case '~':
  387. source.skip();
  388. return CPlusPlusCodeTokeniser::tokenType_operator;
  389. case '<': case '>':
  390. case '|': case '&': case '^':
  391. source.skip();
  392. skipIfNextCharMatches (source, firstChar);
  393. skipIfNextCharMatches (source, '=');
  394. return CPlusPlusCodeTokeniser::tokenType_operator;
  395. case '#':
  396. skipPreprocessorLine (source);
  397. return CPlusPlusCodeTokeniser::tokenType_preprocessor;
  398. default:
  399. if (isIdentifierStart (firstChar))
  400. return parseIdentifier (source);
  401. source.skip();
  402. break;
  403. }
  404. return CPlusPlusCodeTokeniser::tokenType_error;
  405. }
  406. /** A class that can be passed to the CppTokeniserFunctions functions in order to
  407. parse a String.
  408. */
  409. struct StringIterator
  410. {
  411. StringIterator (const String& s) noexcept : t (s.getCharPointer()) {}
  412. StringIterator (String::CharPointerType s) noexcept : t (s) {}
  413. juce_wchar nextChar() noexcept { if (isEOF()) return 0; ++numChars; return t.getAndAdvance(); }
  414. juce_wchar peekNextChar()noexcept { return *t; }
  415. void skip() noexcept { if (! isEOF()) { ++t; ++numChars; } }
  416. void skipWhitespace() noexcept { while (t.isWhitespace()) skip(); }
  417. void skipToEndOfLine() noexcept { while (*t != '\r' && *t != '\n' && *t != 0) skip(); }
  418. bool isEOF() const noexcept { return t.isEmpty(); }
  419. String::CharPointerType t;
  420. int numChars = 0;
  421. };
  422. //==============================================================================
  423. /** Takes a UTF8 string and writes it to a stream using standard C++ escape sequences for any
  424. non-ascii bytes.
  425. Although not strictly a tokenising function, this is still a function that often comes in
  426. handy when working with C++ code!
  427. Note that addEscapeChars() is easier to use than this function if you're working with Strings.
  428. @see addEscapeChars
  429. */
  430. static void writeEscapeChars (OutputStream& out, const char* utf8, const int numBytesToRead,
  431. const int maxCharsOnLine, const bool breakAtNewLines,
  432. const bool replaceSingleQuotes, const bool allowStringBreaks)
  433. {
  434. int charsOnLine = 0;
  435. bool lastWasHexEscapeCode = false;
  436. bool trigraphDetected = false;
  437. for (int i = 0; i < numBytesToRead || numBytesToRead < 0; ++i)
  438. {
  439. auto c = (unsigned char) utf8[i];
  440. bool startNewLine = false;
  441. switch (c)
  442. {
  443. case '\t': out << "\\t"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  444. case '\r': out << "\\r"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  445. case '\n': out << "\\n"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; startNewLine = breakAtNewLines; break;
  446. case '\\': out << "\\\\"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  447. case '\"': out << "\\\""; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  448. case '?':
  449. if (trigraphDetected)
  450. {
  451. out << "\\?";
  452. charsOnLine++;
  453. trigraphDetected = false;
  454. }
  455. else
  456. {
  457. out << "?";
  458. trigraphDetected = true;
  459. }
  460. lastWasHexEscapeCode = false;
  461. charsOnLine++;
  462. break;
  463. case 0:
  464. if (numBytesToRead < 0)
  465. return;
  466. out << "\\0";
  467. lastWasHexEscapeCode = true;
  468. trigraphDetected = false;
  469. charsOnLine += 2;
  470. break;
  471. case '\'':
  472. if (replaceSingleQuotes)
  473. {
  474. out << "\\\'";
  475. lastWasHexEscapeCode = false;
  476. trigraphDetected = false;
  477. charsOnLine += 2;
  478. break;
  479. }
  480. // deliberate fall-through...
  481. JUCE_FALLTHROUGH
  482. default:
  483. if (c >= 32 && c < 127 && ! (lastWasHexEscapeCode // (have to avoid following a hex escape sequence with a valid hex digit)
  484. && CharacterFunctions::getHexDigitValue (c) >= 0))
  485. {
  486. out << (char) c;
  487. lastWasHexEscapeCode = false;
  488. trigraphDetected = false;
  489. ++charsOnLine;
  490. }
  491. else if (allowStringBreaks && lastWasHexEscapeCode && c >= 32 && c < 127)
  492. {
  493. out << "\"\"" << (char) c;
  494. lastWasHexEscapeCode = false;
  495. trigraphDetected = false;
  496. charsOnLine += 3;
  497. }
  498. else
  499. {
  500. out << (c < 16 ? "\\x0" : "\\x") << String::toHexString ((int) c);
  501. lastWasHexEscapeCode = true;
  502. trigraphDetected = false;
  503. charsOnLine += 4;
  504. }
  505. break;
  506. }
  507. if ((startNewLine || (maxCharsOnLine > 0 && charsOnLine >= maxCharsOnLine))
  508. && (numBytesToRead < 0 || i < numBytesToRead - 1))
  509. {
  510. charsOnLine = 0;
  511. out << "\"" << newLine << "\"";
  512. lastWasHexEscapeCode = false;
  513. }
  514. }
  515. }
  516. /** Takes a string and returns a version of it where standard C++ escape sequences have been
  517. used to replace any non-ascii bytes.
  518. Although not strictly a tokenising function, this is still a function that often comes in
  519. handy when working with C++ code!
  520. @see writeEscapeChars
  521. */
  522. static String addEscapeChars (const String& s)
  523. {
  524. MemoryOutputStream mo;
  525. writeEscapeChars (mo, s.toRawUTF8(), -1, -1, false, true, true);
  526. return mo.toString();
  527. }
  528. };
  529. } // namespace juce