The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

664 lines
21KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2017 - ROLI Ltd.
  5. JUCE is an open source library subject to commercial or open-source
  6. licensing.
  7. By using JUCE, you agree to the terms of both the JUCE 5 End-User License
  8. Agreement and JUCE 5 Privacy Policy (both updated and effective as of the
  9. 27th April 2017).
  10. End User License Agreement: www.juce.com/juce-5-licence
  11. Privacy Policy: www.juce.com/juce-5-privacy-policy
  12. Or: You may also use this code under the terms of the GPL v3 (see
  13. www.gnu.org/licenses).
  14. JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  15. EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  16. DISCLAIMED.
  17. ==============================================================================
  18. */
  19. #pragma once
  20. //==============================================================================
  21. /** Class containing some basic functions for simple tokenising of C++ code.
  22. */
  23. struct CppTokeniserFunctions
  24. {
  25. static bool isIdentifierStart (const juce_wchar c) noexcept
  26. {
  27. return CharacterFunctions::isLetter (c)
  28. || c == '_' || c == '@';
  29. }
  30. static bool isIdentifierBody (const juce_wchar c) noexcept
  31. {
  32. return CharacterFunctions::isLetterOrDigit (c)
  33. || c == '_' || c == '@';
  34. }
  35. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  36. {
  37. static const char* const keywords2Char[] =
  38. { "if", "do", "or", nullptr };
  39. static const char* const keywords3Char[] =
  40. { "for", "int", "new", "try", "xor", "and", "asm", "not", nullptr };
  41. static const char* const keywords4Char[] =
  42. { "bool", "void", "this", "true", "long", "else", "char",
  43. "enum", "case", "goto", "auto", nullptr };
  44. static const char* const keywords5Char[] =
  45. { "float", "const", "while", "break", "false", "catch", "class", "bitor",
  46. "compl", "or_eq", "short", "throw", "union", "using", "final", nullptr };
  47. static const char* const keywords6Char[] =
  48. { "return", "and_eq", "bitand", "delete", "double", "export", "extern",
  49. "friend", "inline", "not_eq", "public", "signed", "sizeof", "static",
  50. "struct", "switch", "typeid", "xor_eq", nullptr };
  51. static const char* const keywords7Char[] =
  52. { "nullptr", "alignas", "alignof", "default", "mutable", "private",
  53. "typedef", "virtual", "wchar_t", "__cdecl", "_Pragma", "uint8_t", nullptr };
  54. static const char* const keywordsOther[] =
  55. { "char16_t", "char32_t", "const_cast", "constexpr", "continue", "decltype", "dynamic_cast",
  56. "explicit", "namespace", "noexcept", "operator", "protected", "register", "reinterpret_cast",
  57. "static_assert", "static_cast", "template", "thread_local", "typename", "unsigned", "volatile",
  58. "@class", "@dynamic", "@end", "@implementation", "@interface", "@public", "@private",
  59. "@protected", "@property", "@synthesize", "__fastcall", "__stdcall", nullptr };
  60. const char* const* k;
  61. switch (tokenLength)
  62. {
  63. case 2: k = keywords2Char; break;
  64. case 3: k = keywords3Char; break;
  65. case 4: k = keywords4Char; break;
  66. case 5: k = keywords5Char; break;
  67. case 6: k = keywords6Char; break;
  68. case 7: k = keywords7Char; break;
  69. default:
  70. if (tokenLength < 2 || tokenLength > 16)
  71. return false;
  72. k = keywordsOther;
  73. break;
  74. }
  75. for (int i = 0; k[i] != 0; ++i)
  76. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  77. return true;
  78. return false;
  79. }
  80. template <typename Iterator>
  81. static int parseIdentifier (Iterator& source) noexcept
  82. {
  83. int tokenLength = 0;
  84. String::CharPointerType::CharType possibleIdentifier [100];
  85. String::CharPointerType possible (possibleIdentifier);
  86. while (isIdentifierBody (source.peekNextChar()))
  87. {
  88. const juce_wchar c = source.nextChar();
  89. if (tokenLength < 20)
  90. possible.write (c);
  91. ++tokenLength;
  92. }
  93. if (tokenLength > 1 && tokenLength <= 16)
  94. {
  95. possible.writeNull();
  96. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  97. return CPlusPlusCodeTokeniser::tokenType_keyword;
  98. }
  99. return CPlusPlusCodeTokeniser::tokenType_identifier;
  100. }
  101. template <typename Iterator>
  102. static bool skipNumberSuffix (Iterator& source)
  103. {
  104. const juce_wchar c = source.peekNextChar();
  105. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  106. source.skip();
  107. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  108. return false;
  109. return true;
  110. }
  111. static bool isHexDigit (const juce_wchar c) noexcept
  112. {
  113. return (c >= '0' && c <= '9')
  114. || (c >= 'a' && c <= 'f')
  115. || (c >= 'A' && c <= 'F');
  116. }
  117. template <typename Iterator>
  118. static bool parseHexLiteral (Iterator& source) noexcept
  119. {
  120. if (source.peekNextChar() == '-')
  121. source.skip();
  122. if (source.nextChar() != '0')
  123. return false;
  124. juce_wchar c = source.nextChar();
  125. if (c != 'x' && c != 'X')
  126. return false;
  127. int numDigits = 0;
  128. while (isHexDigit (source.peekNextChar()))
  129. {
  130. ++numDigits;
  131. source.skip();
  132. }
  133. if (numDigits == 0)
  134. return false;
  135. return skipNumberSuffix (source);
  136. }
  137. static bool isOctalDigit (const juce_wchar c) noexcept
  138. {
  139. return c >= '0' && c <= '7';
  140. }
  141. template <typename Iterator>
  142. static bool parseOctalLiteral (Iterator& source) noexcept
  143. {
  144. if (source.peekNextChar() == '-')
  145. source.skip();
  146. if (source.nextChar() != '0')
  147. return false;
  148. if (! isOctalDigit (source.nextChar()))
  149. return false;
  150. while (isOctalDigit (source.peekNextChar()))
  151. source.skip();
  152. return skipNumberSuffix (source);
  153. }
  154. static bool isDecimalDigit (const juce_wchar c) noexcept
  155. {
  156. return c >= '0' && c <= '9';
  157. }
  158. template <typename Iterator>
  159. static bool parseDecimalLiteral (Iterator& source) noexcept
  160. {
  161. if (source.peekNextChar() == '-')
  162. source.skip();
  163. int numChars = 0;
  164. while (isDecimalDigit (source.peekNextChar()))
  165. {
  166. ++numChars;
  167. source.skip();
  168. }
  169. if (numChars == 0)
  170. return false;
  171. return skipNumberSuffix (source);
  172. }
  173. template <typename Iterator>
  174. static bool parseFloatLiteral (Iterator& source) noexcept
  175. {
  176. if (source.peekNextChar() == '-')
  177. source.skip();
  178. int numDigits = 0;
  179. while (isDecimalDigit (source.peekNextChar()))
  180. {
  181. source.skip();
  182. ++numDigits;
  183. }
  184. const bool hasPoint = (source.peekNextChar() == '.');
  185. if (hasPoint)
  186. {
  187. source.skip();
  188. while (isDecimalDigit (source.peekNextChar()))
  189. {
  190. source.skip();
  191. ++numDigits;
  192. }
  193. }
  194. if (numDigits == 0)
  195. return false;
  196. juce_wchar c = source.peekNextChar();
  197. const bool hasExponent = (c == 'e' || c == 'E');
  198. if (hasExponent)
  199. {
  200. source.skip();
  201. c = source.peekNextChar();
  202. if (c == '+' || c == '-')
  203. source.skip();
  204. int numExpDigits = 0;
  205. while (isDecimalDigit (source.peekNextChar()))
  206. {
  207. source.skip();
  208. ++numExpDigits;
  209. }
  210. if (numExpDigits == 0)
  211. return false;
  212. }
  213. c = source.peekNextChar();
  214. if (c == 'f' || c == 'F')
  215. source.skip();
  216. else if (! (hasExponent || hasPoint))
  217. return false;
  218. return true;
  219. }
  220. template <typename Iterator>
  221. static int parseNumber (Iterator& source)
  222. {
  223. const Iterator original (source);
  224. if (parseFloatLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_float;
  225. source = original;
  226. if (parseHexLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  227. source = original;
  228. if (parseOctalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  229. source = original;
  230. if (parseDecimalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  231. source = original;
  232. return CPlusPlusCodeTokeniser::tokenType_error;
  233. }
  234. template <typename Iterator>
  235. static void skipQuotedString (Iterator& source) noexcept
  236. {
  237. const juce_wchar quote = source.nextChar();
  238. for (;;)
  239. {
  240. const juce_wchar c = source.nextChar();
  241. if (c == quote || c == 0)
  242. break;
  243. if (c == '\\')
  244. source.skip();
  245. }
  246. }
  247. template <typename Iterator>
  248. static void skipComment (Iterator& source) noexcept
  249. {
  250. bool lastWasStar = false;
  251. for (;;)
  252. {
  253. const juce_wchar c = source.nextChar();
  254. if (c == 0 || (c == '/' && lastWasStar))
  255. break;
  256. lastWasStar = (c == '*');
  257. }
  258. }
  259. template <typename Iterator>
  260. static void skipPreprocessorLine (Iterator& source) noexcept
  261. {
  262. bool lastWasBackslash = false;
  263. for (;;)
  264. {
  265. const juce_wchar c = source.peekNextChar();
  266. if (c == '"')
  267. {
  268. skipQuotedString (source);
  269. continue;
  270. }
  271. if (c == '/')
  272. {
  273. Iterator next (source);
  274. next.skip();
  275. const juce_wchar c2 = next.peekNextChar();
  276. if (c2 == '/' || c2 == '*')
  277. return;
  278. }
  279. if (c == 0)
  280. break;
  281. if (c == '\n' || c == '\r')
  282. {
  283. source.skipToEndOfLine();
  284. if (lastWasBackslash)
  285. skipPreprocessorLine (source);
  286. break;
  287. }
  288. lastWasBackslash = (c == '\\');
  289. source.skip();
  290. }
  291. }
  292. template <typename Iterator>
  293. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
  294. {
  295. if (source.peekNextChar() == c)
  296. source.skip();
  297. }
  298. template <typename Iterator>
  299. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
  300. {
  301. const juce_wchar c = source.peekNextChar();
  302. if (c == c1 || c == c2)
  303. source.skip();
  304. }
  305. template <typename Iterator>
  306. static int readNextToken (Iterator& source)
  307. {
  308. source.skipWhitespace();
  309. const juce_wchar firstChar = source.peekNextChar();
  310. switch (firstChar)
  311. {
  312. case 0:
  313. break;
  314. case '0': case '1': case '2': case '3': case '4':
  315. case '5': case '6': case '7': case '8': case '9':
  316. case '.':
  317. {
  318. int result = parseNumber (source);
  319. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  320. {
  321. source.skip();
  322. if (firstChar == '.')
  323. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  324. }
  325. return result;
  326. }
  327. case ',':
  328. case ';':
  329. case ':':
  330. source.skip();
  331. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  332. case '(': case ')':
  333. case '{': case '}':
  334. case '[': case ']':
  335. source.skip();
  336. return CPlusPlusCodeTokeniser::tokenType_bracket;
  337. case '"':
  338. case '\'':
  339. skipQuotedString (source);
  340. return CPlusPlusCodeTokeniser::tokenType_string;
  341. case '+':
  342. source.skip();
  343. skipIfNextCharMatches (source, '+', '=');
  344. return CPlusPlusCodeTokeniser::tokenType_operator;
  345. case '-':
  346. {
  347. source.skip();
  348. int result = parseNumber (source);
  349. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  350. {
  351. skipIfNextCharMatches (source, '-', '=');
  352. return CPlusPlusCodeTokeniser::tokenType_operator;
  353. }
  354. return result;
  355. }
  356. case '*': case '%':
  357. case '=': case '!':
  358. source.skip();
  359. skipIfNextCharMatches (source, '=');
  360. return CPlusPlusCodeTokeniser::tokenType_operator;
  361. case '/':
  362. {
  363. source.skip();
  364. juce_wchar nextChar = source.peekNextChar();
  365. if (nextChar == '/')
  366. {
  367. source.skipToEndOfLine();
  368. return CPlusPlusCodeTokeniser::tokenType_comment;
  369. }
  370. if (nextChar == '*')
  371. {
  372. source.skip();
  373. skipComment (source);
  374. return CPlusPlusCodeTokeniser::tokenType_comment;
  375. }
  376. if (nextChar == '=')
  377. source.skip();
  378. return CPlusPlusCodeTokeniser::tokenType_operator;
  379. }
  380. case '?':
  381. case '~':
  382. source.skip();
  383. return CPlusPlusCodeTokeniser::tokenType_operator;
  384. case '<': case '>':
  385. case '|': case '&': case '^':
  386. source.skip();
  387. skipIfNextCharMatches (source, firstChar);
  388. skipIfNextCharMatches (source, '=');
  389. return CPlusPlusCodeTokeniser::tokenType_operator;
  390. case '#':
  391. skipPreprocessorLine (source);
  392. return CPlusPlusCodeTokeniser::tokenType_preprocessor;
  393. default:
  394. if (isIdentifierStart (firstChar))
  395. return parseIdentifier (source);
  396. source.skip();
  397. break;
  398. }
  399. return CPlusPlusCodeTokeniser::tokenType_error;
  400. }
  401. /** A class that can be passed to the CppTokeniserFunctions functions in order to
  402. parse a String.
  403. */
  404. struct StringIterator
  405. {
  406. StringIterator (const String& s) noexcept : t (s.getCharPointer()), numChars (0) {}
  407. StringIterator (String::CharPointerType s) noexcept : t (s), numChars (0) {}
  408. juce_wchar nextChar() noexcept { if (isEOF()) return 0; ++numChars; return t.getAndAdvance(); }
  409. juce_wchar peekNextChar()noexcept { return *t; }
  410. void skip() noexcept { if (! isEOF()) { ++t; ++numChars; } }
  411. void skipWhitespace() noexcept { while (t.isWhitespace()) skip(); }
  412. void skipToEndOfLine() noexcept { while (*t != '\r' && *t != '\n' && *t != 0) skip(); }
  413. bool isEOF() const noexcept { return t.isEmpty(); }
  414. String::CharPointerType t;
  415. int numChars;
  416. };
  417. //==============================================================================
  418. /** Takes a UTF8 string and writes it to a stream using standard C++ escape sequences for any
  419. non-ascii bytes.
  420. Although not strictly a tokenising function, this is still a function that often comes in
  421. handy when working with C++ code!
  422. Note that addEscapeChars() is easier to use than this function if you're working with Strings.
  423. @see addEscapeChars
  424. */
  425. static void writeEscapeChars (OutputStream& out, const char* utf8, const int numBytesToRead,
  426. const int maxCharsOnLine, const bool breakAtNewLines,
  427. const bool replaceSingleQuotes, const bool allowStringBreaks)
  428. {
  429. int charsOnLine = 0;
  430. bool lastWasHexEscapeCode = false;
  431. bool trigraphDetected = false;
  432. for (int i = 0; i < numBytesToRead || numBytesToRead < 0; ++i)
  433. {
  434. const unsigned char c = (unsigned char) utf8[i];
  435. bool startNewLine = false;
  436. switch (c)
  437. {
  438. case '\t': out << "\\t"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  439. case '\r': out << "\\r"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  440. case '\n': out << "\\n"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; startNewLine = breakAtNewLines; break;
  441. case '\\': out << "\\\\"; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  442. case '\"': out << "\\\""; trigraphDetected = false; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  443. case '?':
  444. if (trigraphDetected)
  445. {
  446. out << "\\?";
  447. charsOnLine++;
  448. trigraphDetected = false;
  449. }
  450. else
  451. {
  452. out << "?";
  453. trigraphDetected = true;
  454. }
  455. lastWasHexEscapeCode = false;
  456. charsOnLine++;
  457. break;
  458. case 0:
  459. if (numBytesToRead < 0)
  460. return;
  461. out << "\\0";
  462. lastWasHexEscapeCode = true;
  463. trigraphDetected = false;
  464. charsOnLine += 2;
  465. break;
  466. case '\'':
  467. if (replaceSingleQuotes)
  468. {
  469. out << "\\\'";
  470. lastWasHexEscapeCode = false;
  471. trigraphDetected = false;
  472. charsOnLine += 2;
  473. break;
  474. }
  475. // deliberate fall-through...
  476. default:
  477. if (c >= 32 && c < 127 && ! (lastWasHexEscapeCode // (have to avoid following a hex escape sequence with a valid hex digit)
  478. && CharacterFunctions::getHexDigitValue (c) >= 0))
  479. {
  480. out << (char) c;
  481. lastWasHexEscapeCode = false;
  482. trigraphDetected = false;
  483. ++charsOnLine;
  484. }
  485. else if (allowStringBreaks && lastWasHexEscapeCode && c >= 32 && c < 127)
  486. {
  487. out << "\"\"" << (char) c;
  488. lastWasHexEscapeCode = false;
  489. trigraphDetected = false;
  490. charsOnLine += 3;
  491. }
  492. else
  493. {
  494. out << (c < 16 ? "\\x0" : "\\x") << String::toHexString ((int) c);
  495. lastWasHexEscapeCode = true;
  496. trigraphDetected = false;
  497. charsOnLine += 4;
  498. }
  499. break;
  500. }
  501. if ((startNewLine || (maxCharsOnLine > 0 && charsOnLine >= maxCharsOnLine))
  502. && (numBytesToRead < 0 || i < numBytesToRead - 1))
  503. {
  504. charsOnLine = 0;
  505. out << "\"" << newLine << "\"";
  506. lastWasHexEscapeCode = false;
  507. }
  508. }
  509. }
  510. /** Takes a string and returns a version of it where standard C++ escape sequences have been
  511. used to replace any non-ascii bytes.
  512. Although not strictly a tokenising function, this is still a function that often comes in
  513. handy when working with C++ code!
  514. @see writeEscapeChars
  515. */
  516. static String addEscapeChars (const String& s)
  517. {
  518. MemoryOutputStream mo;
  519. writeEscapeChars (mo, s.toRawUTF8(), -1, -1, false, true, true);
  520. return mo.toString();
  521. }
  522. };