The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

668 lines
20KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE 6 technical preview.
  4. Copyright (c) 2020 - Raw Material Software Limited
  5. You may use this code under the terms of the GPL v3
  6. (see www.gnu.org/licenses).
  7. For this technical preview, this file is not subject to commercial licensing.
  8. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  9. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  10. DISCLAIMED.
  11. ==============================================================================
  12. */
  13. namespace juce
  14. {
  15. //==============================================================================
  16. /** Class containing some basic functions for simple tokenising of C++ code.
  17. @tags{GUI}
  18. */
  19. struct CppTokeniserFunctions
  20. {
  21. static bool isIdentifierStart (const juce_wchar c) noexcept
  22. {
  23. return CharacterFunctions::isLetter (c)
  24. || c == '_' || c == '@';
  25. }
  26. static bool isIdentifierBody (const juce_wchar c) noexcept
  27. {
  28. return CharacterFunctions::isLetterOrDigit (c)
  29. || c == '_' || c == '@';
  30. }
  31. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  32. {
  33. static const char* const keywords2Char[] =
  34. { "do", "if", "or", nullptr };
  35. static const char* const keywords3Char[] =
  36. { "and", "asm", "for", "int", "new", "not", "try", "xor", nullptr };
  37. static const char* const keywords4Char[] =
  38. { "auto", "bool", "case", "char", "else", "enum", "goto",
  39. "long", "this", "true", "void", nullptr };
  40. static const char* const keywords5Char[] =
  41. { "bitor", "break", "catch", "class", "compl", "const", "false", "final",
  42. "float", "or_eq", "short", "throw", "union", "using", "while", nullptr };
  43. static const char* const keywords6Char[] =
  44. { "and_eq", "bitand", "delete", "double", "export", "extern", "friend",
  45. "import", "inline", "module", "not_eq", "public", "return", "signed",
  46. "sizeof", "static", "struct", "switch", "typeid", "xor_eq", nullptr };
  47. static const char* const keywords7Char[] =
  48. { "__cdecl", "_Pragma", "alignas", "alignof", "concept", "default",
  49. "mutable", "nullptr", "private", "typedef", "uint8_t", "virtual",
  50. "wchar_t", nullptr };
  51. static const char* const keywordsOther[] =
  52. { "@class", "@dynamic", "@end", "@implementation", "@interface", "@public",
  53. "@private", "@protected", "@property", "@synthesize", "__fastcall", "__stdcall",
  54. "atomic_cancel", "atomic_commit", "atomic_noexcept", "char16_t", "char32_t",
  55. "co_await", "co_return", "co_yield", "const_cast", "constexpr", "continue",
  56. "decltype", "dynamic_cast", "explicit", "namespace", "noexcept", "operator", "override",
  57. "protected", "register", "reinterpret_cast", "requires", "static_assert",
  58. "static_cast", "synchronized", "template", "thread_local", "typename", "unsigned",
  59. "volatile", nullptr };
  60. const char* const* k;
  61. switch (tokenLength)
  62. {
  63. case 2: k = keywords2Char; break;
  64. case 3: k = keywords3Char; break;
  65. case 4: k = keywords4Char; break;
  66. case 5: k = keywords5Char; break;
  67. case 6: k = keywords6Char; break;
  68. case 7: k = keywords7Char; break;
  69. default:
  70. if (tokenLength < 2 || tokenLength > 16)
  71. return false;
  72. k = keywordsOther;
  73. break;
  74. }
  75. for (int i = 0; k[i] != nullptr; ++i)
  76. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  77. return true;
  78. return false;
  79. }
  80. template <typename Iterator>
  81. static int parseIdentifier (Iterator& source) noexcept
  82. {
  83. int tokenLength = 0;
  84. String::CharPointerType::CharType possibleIdentifier[100];
  85. String::CharPointerType possible (possibleIdentifier);
  86. while (isIdentifierBody (source.peekNextChar()))
  87. {
  88. auto c = source.nextChar();
  89. if (tokenLength < 20)
  90. possible.write (c);
  91. ++tokenLength;
  92. }
  93. if (tokenLength > 1 && tokenLength <= 16)
  94. {
  95. possible.writeNull();
  96. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  97. return CPlusPlusCodeTokeniser::tokenType_keyword;
  98. }
  99. return CPlusPlusCodeTokeniser::tokenType_identifier;
  100. }
  101. template <typename Iterator>
  102. static bool skipNumberSuffix (Iterator& source)
  103. {
  104. auto c = source.peekNextChar();
  105. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  106. source.skip();
  107. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  108. return false;
  109. return true;
  110. }
  111. static bool isHexDigit (const juce_wchar c) noexcept
  112. {
  113. return (c >= '0' && c <= '9')
  114. || (c >= 'a' && c <= 'f')
  115. || (c >= 'A' && c <= 'F');
  116. }
  117. template <typename Iterator>
  118. static bool parseHexLiteral (Iterator& source) noexcept
  119. {
  120. if (source.peekNextChar() == '-')
  121. source.skip();
  122. if (source.nextChar() != '0')
  123. return false;
  124. auto c = source.nextChar();
  125. if (c != 'x' && c != 'X')
  126. return false;
  127. int numDigits = 0;
  128. while (isHexDigit (source.peekNextChar()))
  129. {
  130. ++numDigits;
  131. source.skip();
  132. }
  133. if (numDigits == 0)
  134. return false;
  135. return skipNumberSuffix (source);
  136. }
  137. static bool isOctalDigit (const juce_wchar c) noexcept
  138. {
  139. return c >= '0' && c <= '7';
  140. }
  141. template <typename Iterator>
  142. static bool parseOctalLiteral (Iterator& source) noexcept
  143. {
  144. if (source.peekNextChar() == '-')
  145. source.skip();
  146. if (source.nextChar() != '0')
  147. return false;
  148. if (! isOctalDigit (source.nextChar()))
  149. return false;
  150. while (isOctalDigit (source.peekNextChar()))
  151. source.skip();
  152. return skipNumberSuffix (source);
  153. }
  154. static bool isDecimalDigit (const juce_wchar c) noexcept
  155. {
  156. return c >= '0' && c <= '9';
  157. }
  158. template <typename Iterator>
  159. static bool parseDecimalLiteral (Iterator& source) noexcept
  160. {
  161. if (source.peekNextChar() == '-')
  162. source.skip();
  163. int numChars = 0;
  164. while (isDecimalDigit (source.peekNextChar()))
  165. {
  166. ++numChars;
  167. source.skip();
  168. }
  169. if (numChars == 0)
  170. return false;
  171. return skipNumberSuffix (source);
  172. }
  173. template <typename Iterator>
  174. static bool parseFloatLiteral (Iterator& source) noexcept
  175. {
  176. if (source.peekNextChar() == '-')
  177. source.skip();
  178. int numDigits = 0;
  179. while (isDecimalDigit (source.peekNextChar()))
  180. {
  181. source.skip();
  182. ++numDigits;
  183. }
  184. const bool hasPoint = (source.peekNextChar() == '.');
  185. if (hasPoint)
  186. {
  187. source.skip();
  188. while (isDecimalDigit (source.peekNextChar()))
  189. {
  190. source.skip();
  191. ++numDigits;
  192. }
  193. }
  194. if (numDigits == 0)
  195. return false;
  196. auto c = source.peekNextChar();
  197. bool hasExponent = (c == 'e' || c == 'E');
  198. if (hasExponent)
  199. {
  200. source.skip();
  201. c = source.peekNextChar();
  202. if (c == '+' || c == '-')
  203. source.skip();
  204. int numExpDigits = 0;
  205. while (isDecimalDigit (source.peekNextChar()))
  206. {
  207. source.skip();
  208. ++numExpDigits;
  209. }
  210. if (numExpDigits == 0)
  211. return false;
  212. }
  213. c = source.peekNextChar();
  214. if (c == 'f' || c == 'F')
  215. source.skip();
  216. else if (! (hasExponent || hasPoint))
  217. return false;
  218. return true;
  219. }
  220. template <typename Iterator>
  221. static int parseNumber (Iterator& source)
  222. {
  223. const Iterator original (source);
  224. if (parseFloatLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_float;
  225. source = original;
  226. if (parseHexLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  227. source = original;
  228. if (parseOctalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  229. source = original;
  230. if (parseDecimalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  231. source = original;
  232. return CPlusPlusCodeTokeniser::tokenType_error;
  233. }
  234. template <typename Iterator>
  235. static void skipQuotedString (Iterator& source) noexcept
  236. {
  237. auto quote = source.nextChar();
  238. for (;;)
  239. {
  240. auto c = source.nextChar();
  241. if (c == quote || c == 0)
  242. break;
  243. if (c == '\\')
  244. source.skip();
  245. }
  246. }
  247. template <typename Iterator>
  248. static void skipComment (Iterator& source) noexcept
  249. {
  250. bool lastWasStar = false;
  251. for (;;)
  252. {
  253. auto c = source.nextChar();
  254. if (c == 0 || (c == '/' && lastWasStar))
  255. break;
  256. lastWasStar = (c == '*');
  257. }
  258. }
  259. template <typename Iterator>
  260. static void skipPreprocessorLine (Iterator& source) noexcept
  261. {
  262. bool lastWasBackslash = false;
  263. for (;;)
  264. {
  265. auto c = source.peekNextChar();
  266. if (c == '"')
  267. {
  268. skipQuotedString (source);
  269. continue;
  270. }
  271. if (c == '/')
  272. {
  273. Iterator next (source);
  274. next.skip();
  275. auto c2 = next.peekNextChar();
  276. if (c2 == '/' || c2 == '*')
  277. return;
  278. }
  279. if (c == 0)
  280. break;
  281. if (c == '\n' || c == '\r')
  282. {
  283. source.skipToEndOfLine();
  284. if (lastWasBackslash)
  285. skipPreprocessorLine (source);
  286. break;
  287. }
  288. lastWasBackslash = (c == '\\');
  289. source.skip();
  290. }
  291. }
  292. template <typename Iterator>
  293. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
  294. {
  295. if (source.peekNextChar() == c)
  296. source.skip();
  297. }
  298. template <typename Iterator>
  299. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
  300. {
  301. auto c = source.peekNextChar();
  302. if (c == c1 || c == c2)
  303. source.skip();
  304. }
  305. template <typename Iterator>
  306. static int readNextToken (Iterator& source)
  307. {
  308. source.skipWhitespace();
  309. auto firstChar = source.peekNextChar();
  310. switch (firstChar)
  311. {
  312. case 0:
  313. break;
  314. case '0': case '1': case '2': case '3': case '4':
  315. case '5': case '6': case '7': case '8': case '9':
  316. case '.':
  317. {
  318. auto result = parseNumber (source);
  319. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  320. {
  321. source.skip();
  322. if (firstChar == '.')
  323. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  324. }
  325. return result;
  326. }
  327. case ',':
  328. case ';':
  329. case ':':
  330. source.skip();
  331. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  332. case '(': case ')':
  333. case '{': case '}':
  334. case '[': case ']':
  335. source.skip();
  336. return CPlusPlusCodeTokeniser::tokenType_bracket;
  337. case '"':
  338. case '\'':
  339. skipQuotedString (source);
  340. return CPlusPlusCodeTokeniser::tokenType_string;
  341. case '+':
  342. source.skip();
  343. skipIfNextCharMatches (source, '+', '=');
  344. return CPlusPlusCodeTokeniser::tokenType_operator;
  345. case '-':
  346. {
  347. source.skip();
  348. auto result = parseNumber (source);
  349. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  350. {
  351. skipIfNextCharMatches (source, '-', '=');
  352. return CPlusPlusCodeTokeniser::tokenType_operator;
  353. }
  354. return result;
  355. }
  356. case '*': case '%':
  357. case '=': case '!':
  358. source.skip();
  359. skipIfNextCharMatches (source, '=');
  360. return CPlusPlusCodeTokeniser::tokenType_operator;
  361. case '/':
  362. {
  363. source.skip();
  364. auto nextChar = source.peekNextChar();
  365. if (nextChar == '/')
  366. {
  367. source.skipToEndOfLine();
  368. return CPlusPlusCodeTokeniser::tokenType_comment;
  369. }
  370. if (nextChar == '*')
  371. {
  372. source.skip();
  373. skipComment (source);
  374. return CPlusPlusCodeTokeniser::tokenType_comment;
  375. }
  376. if (nextChar == '=')
  377. source.skip();
  378. return CPlusPlusCodeTokeniser::tokenType_operator;
  379. }
  380. case '?':
  381. case '~':
  382. source.skip();
  383. return CPlusPlusCodeTokeniser::tokenType_operator;
  384. case '<': case '>':
  385. case '|': case '&': case '^':
  386. source.skip();
  387. skipIfNextCharMatches (source, firstChar);
  388. skipIfNextCharMatches (source, '=');
  389. return CPlusPlusCodeTokeniser::tokenType_operator;
  390. case '#':
  391. skipPreprocessorLine (source);
  392. return CPlusPlusCodeTokeniser::tokenType_preprocessor;
  393. default:
  394. if (isIdentifierStart (firstChar))
  395. return parseIdentifier (source);
  396. source.skip();
  397. break;
  398. }
  399. return CPlusPlusCodeTokeniser::tokenType_error;
  400. }
  401. /** A class that can be passed to the CppTokeniserFunctions functions in order to
  402. parse a String.
  403. */
  404. struct StringIterator
  405. {
  406. StringIterator (const String& s) noexcept : t (s.getCharPointer()) {}
  407. StringIterator (String::CharPointerType s) noexcept : t (s) {}
  408. juce_wchar nextChar() noexcept { if (isEOF()) return 0; ++numChars; return t.getAndAdvance(); }
  409. juce_wchar peekNextChar()noexcept { return *t; }
  410. void skip() noexcept { if (! isEOF()) { ++t; ++numChars; } }
  411. void skipWhitespace() noexcept { while (t.isWhitespace()) skip(); }
  412. void skipToEndOfLine() noexcept { while (*t != '\r' && *t != '\n' && *t != 0) skip(); }
  413. bool isEOF() const noexcept { return t.isEmpty(); }
  414. String::CharPointerType t;
  415. int numChars = 0;
  416. };
  417. //==============================================================================
  418. /** Takes a UTF8 string and writes it to a stream using standard C++ escape sequences for any
  419. non-ascii bytes.
  420. Although not strictly a tokenising function, this is still a function that often comes in
  421. handy when working with C++ code!
  422. Note that addEscapeChars() is easier to use than this function if you're working with Strings.
  423. @see addEscapeChars
  424. */
  425. static void writeEscapeChars (OutputStream& out, const char* utf8, const int numBytesToRead,
  426. const int maxCharsOnLine, const bool breakAtNewLines,
  427. const bool replaceSingleQuotes, const bool allowStringBreaks)
  428. {
  429. int charsOnLine = 0;
  430. bool lastWasHexEscapeCode = false;
  431. bool trigraphDetected = false;
  432. for (int i = 0; i < numBytesToRead || numBytesToRead < 0; ++i)
  433. {
  434. auto c = (unsigned char) utf8[i];
  435. bool startNewLine = false;
  436. switch (c)
  437. {
  438. case '\t': out << "\\t"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  439. case '\r': out << "\\r"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  440. case '\n': out << "\\n"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; startNewLine = breakAtNewLines; break;
  441. case '\\': out << "\\\\"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  442. case '\"': out << "\\\""; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  443. case '?':
  444. if (trigraphDetected)
  445. {
  446. out << "\\?";
  447. charsOnLine++;
  448. trigraphDetected = false;
  449. }
  450. else
  451. {
  452. out << "?";
  453. trigraphDetected = true;
  454. }
  455. lastWasHexEscapeCode = false;
  456. charsOnLine++;
  457. break;
  458. case 0:
  459. if (numBytesToRead < 0)
  460. return;
  461. out << "\\0";
  462. lastWasHexEscapeCode = true;
  463. trigraphDetected = false;
  464. charsOnLine += 2;
  465. break;
  466. case '\'':
  467. if (replaceSingleQuotes)
  468. {
  469. out << "\\\'";
  470. lastWasHexEscapeCode = false;
  471. trigraphDetected = false;
  472. charsOnLine += 2;
  473. break;
  474. }
  475. // deliberate fall-through...
  476. JUCE_FALLTHROUGH
  477. default:
  478. if (c >= 32 && c < 127 && ! (lastWasHexEscapeCode // (have to avoid following a hex escape sequence with a valid hex digit)
  479. && CharacterFunctions::getHexDigitValue (c) >= 0))
  480. {
  481. out << (char) c;
  482. lastWasHexEscapeCode = false;
  483. trigraphDetected = false;
  484. ++charsOnLine;
  485. }
  486. else if (allowStringBreaks && lastWasHexEscapeCode && c >= 32 && c < 127)
  487. {
  488. out << "\"\"" << (char) c;
  489. lastWasHexEscapeCode = false;
  490. trigraphDetected = false;
  491. charsOnLine += 3;
  492. }
  493. else
  494. {
  495. out << (c < 16 ? "\\x0" : "\\x") << String::toHexString ((int) c);
  496. lastWasHexEscapeCode = true;
  497. trigraphDetected = false;
  498. charsOnLine += 4;
  499. }
  500. break;
  501. }
  502. if ((startNewLine || (maxCharsOnLine > 0 && charsOnLine >= maxCharsOnLine))
  503. && (numBytesToRead < 0 || i < numBytesToRead - 1))
  504. {
  505. charsOnLine = 0;
  506. out << "\"" << newLine << "\"";
  507. lastWasHexEscapeCode = false;
  508. }
  509. }
  510. }
  511. /** Takes a string and returns a version of it where standard C++ escape sequences have been
  512. used to replace any non-ascii bytes.
  513. Although not strictly a tokenising function, this is still a function that often comes in
  514. handy when working with C++ code!
  515. @see writeEscapeChars
  516. */
  517. static String addEscapeChars (const String& s)
  518. {
  519. MemoryOutputStream mo;
  520. writeEscapeChars (mo, s.toRawUTF8(), -1, -1, false, true, true);
  521. return mo.toString();
  522. }
  523. };
  524. } // namespace juce