The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

676 lines
21KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 5 End-User License
  8. Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
  9. 27th April 2017).
  10. End User License Agreement: www.juce.com/juce-5-licence
  11. Privacy Policy: www.juce.com/juce-5-privacy-policy
  12. Or: You may also use this code under the terms of the GPL v3 (see
  13. www.gnu.org/licenses).
  14. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  15. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  16. DISCLAIMED.
  17. ==============================================================================
  18. */
  19. namespace juce
  20. {
  21. //==============================================================================
  22. /** Class containing some basic functions for simple tokenising of C++ code.
  23. @tags{GUI}
  24. */
  25. struct CppTokeniserFunctions
  26. {
  27. static bool isIdentifierStart (const juce_wchar c) noexcept
  28. {
  29. return CharacterFunctions::isLetter (c)
  30. || c == '_' || c == '@';
  31. }
  32. static bool isIdentifierBody (const juce_wchar c) noexcept
  33. {
  34. return CharacterFunctions::isLetterOrDigit (c)
  35. || c == '_' || c == '@';
  36. }
  37. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  38. {
  39. static const char* const keywords2Char[] =
  40. { "do", "if", "or", nullptr };
  41. static const char* const keywords3Char[] =
  42. { "and", "asm", "for", "int", "new", "not", "try", "xor", nullptr };
  43. static const char* const keywords4Char[] =
  44. { "auto", "bool", "case", "char", "else", "enum", "goto",
  45. "long", "this", "true", "void", nullptr };
  46. static const char* const keywords5Char[] =
  47. { "bitor", "break", "catch", "class", "compl", "const", "false", "final",
  48. "float", "or_eq", "short", "throw", "union", "using", "while", nullptr };
  49. static const char* const keywords6Char[] =
  50. { "and_eq", "bitand", "delete", "double", "export", "extern", "friend",
  51. "import", "inline", "module", "not_eq", "public", "return", "signed",
  52. "sizeof", "static", "struct", "switch", "typeid", "xor_eq", nullptr };
  53. static const char* const keywords7Char[] =
  54. { "__cdecl", "_Pragma", "alignas", "alignof", "concept", "default",
  55. "mutable", "nullptr", "private", "typedef", "uint8_t", "virtual",
  56. "wchar_t", nullptr };
  57. static const char* const keywordsOther[] =
  58. { "@class", "@dynamic", "@end", "@implementation", "@interface", "@public",
  59. "@private", "@protected", "@property", "@synthesize", "__fastcall", "__stdcall",
  60. "atomic_cancel", "atomic_commit", "atomic_noexcept", "char16_t", "char32_t",
  61. "co_await", "co_return", "co_yield", "const_cast", "constexpr", "continue",
  62. "decltype", "dynamic_cast", "explicit", "namespace", "noexcept", "operator", "override",
  63. "protected", "register", "reinterpret_cast", "requires", "static_assert",
  64. "static_cast", "synchronized", "template", "thread_local", "typename", "unsigned",
  65. "volatile", nullptr };
  66. const char* const* k;
  67. switch (tokenLength)
  68. {
  69. case 2: k = keywords2Char; break;
  70. case 3: k = keywords3Char; break;
  71. case 4: k = keywords4Char; break;
  72. case 5: k = keywords5Char; break;
  73. case 6: k = keywords6Char; break;
  74. case 7: k = keywords7Char; break;
  75. default:
  76. if (tokenLength < 2 || tokenLength > 16)
  77. return false;
  78. k = keywordsOther;
  79. break;
  80. }
  81. for (int i = 0; k[i] != nullptr; ++i)
  82. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  83. return true;
  84. return false;
  85. }
  86. template <typename Iterator>
  87. static int parseIdentifier (Iterator& source) noexcept
  88. {
  89. int tokenLength = 0;
  90. String::CharPointerType::CharType possibleIdentifier[100];
  91. String::CharPointerType possible (possibleIdentifier);
  92. while (isIdentifierBody (source.peekNextChar()))
  93. {
  94. auto c = source.nextChar();
  95. if (tokenLength < 20)
  96. possible.write (c);
  97. ++tokenLength;
  98. }
  99. if (tokenLength > 1 && tokenLength <= 16)
  100. {
  101. possible.writeNull();
  102. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  103. return CPlusPlusCodeTokeniser::tokenType_keyword;
  104. }
  105. return CPlusPlusCodeTokeniser::tokenType_identifier;
  106. }
  107. template <typename Iterator>
  108. static bool skipNumberSuffix (Iterator& source)
  109. {
  110. auto c = source.peekNextChar();
  111. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  112. source.skip();
  113. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  114. return false;
  115. return true;
  116. }
  117. static bool isHexDigit (const juce_wchar c) noexcept
  118. {
  119. return (c >= '0' && c <= '9')
  120. || (c >= 'a' && c <= 'f')
  121. || (c >= 'A' && c <= 'F');
  122. }
  123. template <typename Iterator>
  124. static bool parseHexLiteral (Iterator& source) noexcept
  125. {
  126. if (source.peekNextChar() == '-')
  127. source.skip();
  128. if (source.nextChar() != '0')
  129. return false;
  130. auto c = source.nextChar();
  131. if (c != 'x' && c != 'X')
  132. return false;
  133. int numDigits = 0;
  134. while (isHexDigit (source.peekNextChar()))
  135. {
  136. ++numDigits;
  137. source.skip();
  138. }
  139. if (numDigits == 0)
  140. return false;
  141. return skipNumberSuffix (source);
  142. }
  143. static bool isOctalDigit (const juce_wchar c) noexcept
  144. {
  145. return c >= '0' && c <= '7';
  146. }
  147. template <typename Iterator>
  148. static bool parseOctalLiteral (Iterator& source) noexcept
  149. {
  150. if (source.peekNextChar() == '-')
  151. source.skip();
  152. if (source.nextChar() != '0')
  153. return false;
  154. if (! isOctalDigit (source.nextChar()))
  155. return false;
  156. while (isOctalDigit (source.peekNextChar()))
  157. source.skip();
  158. return skipNumberSuffix (source);
  159. }
  160. static bool isDecimalDigit (const juce_wchar c) noexcept
  161. {
  162. return c >= '0' && c <= '9';
  163. }
  164. template <typename Iterator>
  165. static bool parseDecimalLiteral (Iterator& source) noexcept
  166. {
  167. if (source.peekNextChar() == '-')
  168. source.skip();
  169. int numChars = 0;
  170. while (isDecimalDigit (source.peekNextChar()))
  171. {
  172. ++numChars;
  173. source.skip();
  174. }
  175. if (numChars == 0)
  176. return false;
  177. return skipNumberSuffix (source);
  178. }
  179. template <typename Iterator>
  180. static bool parseFloatLiteral (Iterator& source) noexcept
  181. {
  182. if (source.peekNextChar() == '-')
  183. source.skip();
  184. int numDigits = 0;
  185. while (isDecimalDigit (source.peekNextChar()))
  186. {
  187. source.skip();
  188. ++numDigits;
  189. }
  190. const bool hasPoint = (source.peekNextChar() == '.');
  191. if (hasPoint)
  192. {
  193. source.skip();
  194. while (isDecimalDigit (source.peekNextChar()))
  195. {
  196. source.skip();
  197. ++numDigits;
  198. }
  199. }
  200. if (numDigits == 0)
  201. return false;
  202. auto c = source.peekNextChar();
  203. bool hasExponent = (c == 'e' || c == 'E');
  204. if (hasExponent)
  205. {
  206. source.skip();
  207. c = source.peekNextChar();
  208. if (c == '+' || c == '-')
  209. source.skip();
  210. int numExpDigits = 0;
  211. while (isDecimalDigit (source.peekNextChar()))
  212. {
  213. source.skip();
  214. ++numExpDigits;
  215. }
  216. if (numExpDigits == 0)
  217. return false;
  218. }
  219. c = source.peekNextChar();
  220. if (c == 'f' || c == 'F')
  221. source.skip();
  222. else if (! (hasExponent || hasPoint))
  223. return false;
  224. return true;
  225. }
  226. template <typename Iterator>
  227. static int parseNumber (Iterator& source)
  228. {
  229. const Iterator original (source);
  230. if (parseFloatLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_float;
  231. source = original;
  232. if (parseHexLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  233. source = original;
  234. if (parseOctalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  235. source = original;
  236. if (parseDecimalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  237. source = original;
  238. return CPlusPlusCodeTokeniser::tokenType_error;
  239. }
  240. template <typename Iterator>
  241. static void skipQuotedString (Iterator& source) noexcept
  242. {
  243. auto quote = source.nextChar();
  244. for (;;)
  245. {
  246. auto c = source.nextChar();
  247. if (c == quote || c == 0)
  248. break;
  249. if (c == '\\')
  250. source.skip();
  251. }
  252. }
  253. template <typename Iterator>
  254. static void skipComment (Iterator& source) noexcept
  255. {
  256. bool lastWasStar = false;
  257. for (;;)
  258. {
  259. auto c = source.nextChar();
  260. if (c == 0 || (c == '/' && lastWasStar))
  261. break;
  262. lastWasStar = (c == '*');
  263. }
  264. }
  265. template <typename Iterator>
  266. static void skipPreprocessorLine (Iterator& source) noexcept
  267. {
  268. bool lastWasBackslash = false;
  269. for (;;)
  270. {
  271. auto c = source.peekNextChar();
  272. if (c == '"')
  273. {
  274. skipQuotedString (source);
  275. continue;
  276. }
  277. if (c == '/')
  278. {
  279. Iterator next (source);
  280. next.skip();
  281. auto c2 = next.peekNextChar();
  282. if (c2 == '/' || c2 == '*')
  283. return;
  284. }
  285. if (c == 0)
  286. break;
  287. if (c == '\n' || c == '\r')
  288. {
  289. source.skipToEndOfLine();
  290. if (lastWasBackslash)
  291. skipPreprocessorLine (source);
  292. break;
  293. }
  294. lastWasBackslash = (c == '\\');
  295. source.skip();
  296. }
  297. }
  298. template <typename Iterator>
  299. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
  300. {
  301. if (source.peekNextChar() == c)
  302. source.skip();
  303. }
  304. template <typename Iterator>
  305. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
  306. {
  307. auto c = source.peekNextChar();
  308. if (c == c1 || c == c2)
  309. source.skip();
  310. }
  311. template <typename Iterator>
  312. static int readNextToken (Iterator& source)
  313. {
  314. source.skipWhitespace();
  315. auto firstChar = source.peekNextChar();
  316. switch (firstChar)
  317. {
  318. case 0:
  319. break;
  320. case '0': case '1': case '2': case '3': case '4':
  321. case '5': case '6': case '7': case '8': case '9':
  322. case '.':
  323. {
  324. auto result = parseNumber (source);
  325. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  326. {
  327. source.skip();
  328. if (firstChar == '.')
  329. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  330. }
  331. return result;
  332. }
  333. case ',':
  334. case ';':
  335. case ':':
  336. source.skip();
  337. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  338. case '(': case ')':
  339. case '{': case '}':
  340. case '[': case ']':
  341. source.skip();
  342. return CPlusPlusCodeTokeniser::tokenType_bracket;
  343. case '"':
  344. case '\'':
  345. skipQuotedString (source);
  346. return CPlusPlusCodeTokeniser::tokenType_string;
  347. case '+':
  348. source.skip();
  349. skipIfNextCharMatches (source, '+', '=');
  350. return CPlusPlusCodeTokeniser::tokenType_operator;
  351. case '-':
  352. {
  353. source.skip();
  354. auto result = parseNumber (source);
  355. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  356. {
  357. skipIfNextCharMatches (source, '-', '=');
  358. return CPlusPlusCodeTokeniser::tokenType_operator;
  359. }
  360. return result;
  361. }
  362. case '*': case '%':
  363. case '=': case '!':
  364. source.skip();
  365. skipIfNextCharMatches (source, '=');
  366. return CPlusPlusCodeTokeniser::tokenType_operator;
  367. case '/':
  368. {
  369. source.skip();
  370. auto nextChar = source.peekNextChar();
  371. if (nextChar == '/')
  372. {
  373. source.skipToEndOfLine();
  374. return CPlusPlusCodeTokeniser::tokenType_comment;
  375. }
  376. if (nextChar == '*')
  377. {
  378. source.skip();
  379. skipComment (source);
  380. return CPlusPlusCodeTokeniser::tokenType_comment;
  381. }
  382. if (nextChar == '=')
  383. source.skip();
  384. return CPlusPlusCodeTokeniser::tokenType_operator;
  385. }
  386. case '?':
  387. case '~':
  388. source.skip();
  389. return CPlusPlusCodeTokeniser::tokenType_operator;
  390. case '<': case '>':
  391. case '|': case '&': case '^':
  392. source.skip();
  393. skipIfNextCharMatches (source, firstChar);
  394. skipIfNextCharMatches (source, '=');
  395. return CPlusPlusCodeTokeniser::tokenType_operator;
  396. case '#':
  397. skipPreprocessorLine (source);
  398. return CPlusPlusCodeTokeniser::tokenType_preprocessor;
  399. default:
  400. if (isIdentifierStart (firstChar))
  401. return parseIdentifier (source);
  402. source.skip();
  403. break;
  404. }
  405. return CPlusPlusCodeTokeniser::tokenType_error;
  406. }
  407. /** A class that can be passed to the CppTokeniserFunctions functions in order to
  408. parse a String.
  409. */
  410. struct StringIterator
  411. {
  412. StringIterator (const String& s) noexcept : t (s.getCharPointer()) {}
  413. StringIterator (String::CharPointerType s) noexcept : t (s) {}
  414. juce_wchar nextChar() noexcept { if (isEOF()) return 0; ++numChars; return t.getAndAdvance(); }
  415. juce_wchar peekNextChar()noexcept { return *t; }
  416. void skip() noexcept { if (! isEOF()) { ++t; ++numChars; } }
  417. void skipWhitespace() noexcept { while (t.isWhitespace()) skip(); }
  418. void skipToEndOfLine() noexcept { while (*t != '\r' && *t != '\n' && *t != 0) skip(); }
  419. bool isEOF() const noexcept { return t.isEmpty(); }
  420. String::CharPointerType t;
  421. int numChars = 0;
  422. };
  423. //==============================================================================
  424. /** Takes a UTF8 string and writes it to a stream using standard C++ escape sequences for any
  425. non-ascii bytes.
  426. Although not strictly a tokenising function, this is still a function that often comes in
  427. handy when working with C++ code!
  428. Note that addEscapeChars() is easier to use than this function if you're working with Strings.
  429. @see addEscapeChars
  430. */
  431. static void writeEscapeChars (OutputStream& out, const char* utf8, const int numBytesToRead,
  432. const int maxCharsOnLine, const bool breakAtNewLines,
  433. const bool replaceSingleQuotes, const bool allowStringBreaks)
  434. {
  435. int charsOnLine = 0;
  436. bool lastWasHexEscapeCode = false;
  437. bool trigraphDetected = false;
  438. for (int i = 0; i < numBytesToRead || numBytesToRead < 0; ++i)
  439. {
  440. auto c = (unsigned char) utf8[i];
  441. bool startNewLine = false;
  442. switch (c)
  443. {
  444. case '\t': out << "\\t"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  445. case '\r': out << "\\r"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  446. case '\n': out << "\\n"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; startNewLine = breakAtNewLines; break;
  447. case '\\': out << "\\\\"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  448. case '\"': out << "\\\""; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  449. case '?':
  450. if (trigraphDetected)
  451. {
  452. out << "\\?";
  453. charsOnLine++;
  454. trigraphDetected = false;
  455. }
  456. else
  457. {
  458. out << "?";
  459. trigraphDetected = true;
  460. }
  461. lastWasHexEscapeCode = false;
  462. charsOnLine++;
  463. break;
  464. case 0:
  465. if (numBytesToRead < 0)
  466. return;
  467. out << "\\0";
  468. lastWasHexEscapeCode = true;
  469. trigraphDetected = false;
  470. charsOnLine += 2;
  471. break;
  472. case '\'':
  473. if (replaceSingleQuotes)
  474. {
  475. out << "\\\'";
  476. lastWasHexEscapeCode = false;
  477. trigraphDetected = false;
  478. charsOnLine += 2;
  479. break;
  480. }
  481. // deliberate fall-through...
  482. default:
  483. if (c >= 32 && c < 127 && ! (lastWasHexEscapeCode // (have to avoid following a hex escape sequence with a valid hex digit)
  484. && CharacterFunctions::getHexDigitValue (c) >= 0))
  485. {
  486. out << (char) c;
  487. lastWasHexEscapeCode = false;
  488. trigraphDetected = false;
  489. ++charsOnLine;
  490. }
  491. else if (allowStringBreaks && lastWasHexEscapeCode && c >= 32 && c < 127)
  492. {
  493. out << "\"\"" << (char) c;
  494. lastWasHexEscapeCode = false;
  495. trigraphDetected = false;
  496. charsOnLine += 3;
  497. }
  498. else
  499. {
  500. out << (c < 16 ? "\\x0" : "\\x") << String::toHexString ((int) c);
  501. lastWasHexEscapeCode = true;
  502. trigraphDetected = false;
  503. charsOnLine += 4;
  504. }
  505. break;
  506. }
  507. if ((startNewLine || (maxCharsOnLine > 0 && charsOnLine >= maxCharsOnLine))
  508. && (numBytesToRead < 0 || i < numBytesToRead - 1))
  509. {
  510. charsOnLine = 0;
  511. out << "\"" << newLine << "\"";
  512. lastWasHexEscapeCode = false;
  513. }
  514. }
  515. }
  516. /** Takes a string and returns a version of it where standard C++ escape sequences have been
  517. used to replace any non-ascii bytes.
  518. Although not strictly a tokenising function, this is still a function that often comes in
  519. handy when working with C++ code!
  520. @see writeEscapeChars
  521. */
  522. static String addEscapeChars (const String& s)
  523. {
  524. MemoryOutputStream mo;
  525. writeEscapeChars (mo, s.toRawUTF8(), -1, -1, false, true, true);
  526. return mo.toString();
  527. }
  528. };
  529. } // namespace juce