The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

642 lines
20KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library.
  4. Copyright (c) 2015 - ROLI Ltd.
  5. Permission is granted to use this software under the terms of either:
  6. a) the GPL v2 (or any later version)
  7. b) the Affero GPL v3
  8. Details of these licenses can be found at: www.gnu.org/licenses
  9. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  11. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. ------------------------------------------------------------------------------
  13. To release a closed-source product which uses JUCE, commercial licenses are
  14. available: visit www.juce.com for more information.
  15. ==============================================================================
  16. */
  17. #ifndef JUCE_CPLUSPLUSCODETOKENISERFUNCTIONS_H_INCLUDED
  18. #define JUCE_CPLUSPLUSCODETOKENISERFUNCTIONS_H_INCLUDED
  19. //==============================================================================
  20. /** Class containing some basic functions for simple tokenising of C++ code.
  21. */
  22. struct CppTokeniserFunctions
  23. {
  24. static bool isIdentifierStart (const juce_wchar c) noexcept
  25. {
  26. return CharacterFunctions::isLetter (c)
  27. || c == '_' || c == '@';
  28. }
  29. static bool isIdentifierBody (const juce_wchar c) noexcept
  30. {
  31. return CharacterFunctions::isLetterOrDigit (c)
  32. || c == '_' || c == '@';
  33. }
  34. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  35. {
  36. static const char* const keywords2Char[] =
  37. { "if", "do", "or", nullptr };
  38. static const char* const keywords3Char[] =
  39. { "for", "int", "new", "try", "xor", "and", "asm", "not", nullptr };
  40. static const char* const keywords4Char[] =
  41. { "bool", "void", "this", "true", "long", "else", "char",
  42. "enum", "case", "goto", "auto", nullptr };
  43. static const char* const keywords5Char[] =
  44. { "float", "const", "while", "break", "false", "catch", "class", "bitor",
  45. "compl", "or_eq", "short", "throw", "union", "using", "final", nullptr };
  46. static const char* const keywords6Char[] =
  47. { "return", "and_eq", "bitand", "delete", "double", "export", "extern",
  48. "friend", "inline", "not_eq", "public", "signed", "sizeof", "static",
  49. "struct", "switch", "typeid", "xor_eq", nullptr };
  50. static const char* const keywords7Char[] =
  51. { "nullptr", "alignas", "alignof", "default", "mutable", "private",
  52. "typedef", "virtual", "wchar_t", "__cdecl", "_Pragma", "uint8_t", nullptr };
  53. static const char* const keywordsOther[] =
  54. { "char16_t", "char32_t", "const_cast", "constexpr", "continue", "decltype", "dynamic_cast",
  55. "explicit", "namespace", "noexcept", "operator", "protected", "register", "reinterpret_cast",
  56. "static_assert", "static_cast", "template", "thread_local", "typename", "unsigned", "volatile",
  57. "@class", "@dynamic", "@end", "@implementation", "@interface", "@public", "@private",
  58. "@protected", "@property", "@synthesize", "__fastcall", "__stdcall", nullptr };
  59. const char* const* k;
  60. switch (tokenLength)
  61. {
  62. case 2: k = keywords2Char; break;
  63. case 3: k = keywords3Char; break;
  64. case 4: k = keywords4Char; break;
  65. case 5: k = keywords5Char; break;
  66. case 6: k = keywords6Char; break;
  67. case 7: k = keywords7Char; break;
  68. default:
  69. if (tokenLength < 2 || tokenLength > 16)
  70. return false;
  71. k = keywordsOther;
  72. break;
  73. }
  74. for (int i = 0; k[i] != 0; ++i)
  75. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  76. return true;
  77. return false;
  78. }
  79. template <typename Iterator>
  80. static int parseIdentifier (Iterator& source) noexcept
  81. {
  82. int tokenLength = 0;
  83. String::CharPointerType::CharType possibleIdentifier [100];
  84. String::CharPointerType possible (possibleIdentifier);
  85. while (isIdentifierBody (source.peekNextChar()))
  86. {
  87. const juce_wchar c = source.nextChar();
  88. if (tokenLength < 20)
  89. possible.write (c);
  90. ++tokenLength;
  91. }
  92. if (tokenLength > 1 && tokenLength <= 16)
  93. {
  94. possible.writeNull();
  95. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  96. return CPlusPlusCodeTokeniser::tokenType_keyword;
  97. }
  98. return CPlusPlusCodeTokeniser::tokenType_identifier;
  99. }
  100. template <typename Iterator>
  101. static bool skipNumberSuffix (Iterator& source)
  102. {
  103. const juce_wchar c = source.peekNextChar();
  104. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  105. source.skip();
  106. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  107. return false;
  108. return true;
  109. }
  110. static bool isHexDigit (const juce_wchar c) noexcept
  111. {
  112. return (c >= '0' && c <= '9')
  113. || (c >= 'a' && c <= 'f')
  114. || (c >= 'A' && c <= 'F');
  115. }
  116. template <typename Iterator>
  117. static bool parseHexLiteral (Iterator& source) noexcept
  118. {
  119. if (source.peekNextChar() == '-')
  120. source.skip();
  121. if (source.nextChar() != '0')
  122. return false;
  123. juce_wchar c = source.nextChar();
  124. if (c != 'x' && c != 'X')
  125. return false;
  126. int numDigits = 0;
  127. while (isHexDigit (source.peekNextChar()))
  128. {
  129. ++numDigits;
  130. source.skip();
  131. }
  132. if (numDigits == 0)
  133. return false;
  134. return skipNumberSuffix (source);
  135. }
  136. static bool isOctalDigit (const juce_wchar c) noexcept
  137. {
  138. return c >= '0' && c <= '7';
  139. }
  140. template <typename Iterator>
  141. static bool parseOctalLiteral (Iterator& source) noexcept
  142. {
  143. if (source.peekNextChar() == '-')
  144. source.skip();
  145. if (source.nextChar() != '0')
  146. return false;
  147. if (! isOctalDigit (source.nextChar()))
  148. return false;
  149. while (isOctalDigit (source.peekNextChar()))
  150. source.skip();
  151. return skipNumberSuffix (source);
  152. }
  153. static bool isDecimalDigit (const juce_wchar c) noexcept
  154. {
  155. return c >= '0' && c <= '9';
  156. }
  157. template <typename Iterator>
  158. static bool parseDecimalLiteral (Iterator& source) noexcept
  159. {
  160. if (source.peekNextChar() == '-')
  161. source.skip();
  162. int numChars = 0;
  163. while (isDecimalDigit (source.peekNextChar()))
  164. {
  165. ++numChars;
  166. source.skip();
  167. }
  168. if (numChars == 0)
  169. return false;
  170. return skipNumberSuffix (source);
  171. }
  172. template <typename Iterator>
  173. static bool parseFloatLiteral (Iterator& source) noexcept
  174. {
  175. if (source.peekNextChar() == '-')
  176. source.skip();
  177. int numDigits = 0;
  178. while (isDecimalDigit (source.peekNextChar()))
  179. {
  180. source.skip();
  181. ++numDigits;
  182. }
  183. const bool hasPoint = (source.peekNextChar() == '.');
  184. if (hasPoint)
  185. {
  186. source.skip();
  187. while (isDecimalDigit (source.peekNextChar()))
  188. {
  189. source.skip();
  190. ++numDigits;
  191. }
  192. }
  193. if (numDigits == 0)
  194. return false;
  195. juce_wchar c = source.peekNextChar();
  196. const bool hasExponent = (c == 'e' || c == 'E');
  197. if (hasExponent)
  198. {
  199. source.skip();
  200. c = source.peekNextChar();
  201. if (c == '+' || c == '-')
  202. source.skip();
  203. int numExpDigits = 0;
  204. while (isDecimalDigit (source.peekNextChar()))
  205. {
  206. source.skip();
  207. ++numExpDigits;
  208. }
  209. if (numExpDigits == 0)
  210. return false;
  211. }
  212. c = source.peekNextChar();
  213. if (c == 'f' || c == 'F')
  214. source.skip();
  215. else if (! (hasExponent || hasPoint))
  216. return false;
  217. return true;
  218. }
  219. template <typename Iterator>
  220. static int parseNumber (Iterator& source)
  221. {
  222. const Iterator original (source);
  223. if (parseFloatLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_float;
  224. source = original;
  225. if (parseHexLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  226. source = original;
  227. if (parseOctalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  228. source = original;
  229. if (parseDecimalLiteral (source)) return CPlusPlusCodeTokeniser::tokenType_integer;
  230. source = original;
  231. return CPlusPlusCodeTokeniser::tokenType_error;
  232. }
  233. template <typename Iterator>
  234. static void skipQuotedString (Iterator& source) noexcept
  235. {
  236. const juce_wchar quote = source.nextChar();
  237. for (;;)
  238. {
  239. const juce_wchar c = source.nextChar();
  240. if (c == quote || c == 0)
  241. break;
  242. if (c == '\\')
  243. source.skip();
  244. }
  245. }
  246. template <typename Iterator>
  247. static void skipComment (Iterator& source) noexcept
  248. {
  249. bool lastWasStar = false;
  250. for (;;)
  251. {
  252. const juce_wchar c = source.nextChar();
  253. if (c == 0 || (c == '/' && lastWasStar))
  254. break;
  255. lastWasStar = (c == '*');
  256. }
  257. }
  258. template <typename Iterator>
  259. static void skipPreprocessorLine (Iterator& source) noexcept
  260. {
  261. bool lastWasBackslash = false;
  262. for (;;)
  263. {
  264. const juce_wchar c = source.peekNextChar();
  265. if (c == '"')
  266. {
  267. skipQuotedString (source);
  268. continue;
  269. }
  270. if (c == '/')
  271. {
  272. Iterator next (source);
  273. next.skip();
  274. const juce_wchar c2 = next.peekNextChar();
  275. if (c2 == '/' || c2 == '*')
  276. return;
  277. }
  278. if (c == 0)
  279. break;
  280. if (c == '\n' || c == '\r')
  281. {
  282. source.skipToEndOfLine();
  283. if (lastWasBackslash)
  284. skipPreprocessorLine (source);
  285. break;
  286. }
  287. lastWasBackslash = (c == '\\');
  288. source.skip();
  289. }
  290. }
  291. template <typename Iterator>
  292. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
  293. {
  294. if (source.peekNextChar() == c)
  295. source.skip();
  296. }
  297. template <typename Iterator>
  298. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
  299. {
  300. const juce_wchar c = source.peekNextChar();
  301. if (c == c1 || c == c2)
  302. source.skip();
  303. }
  304. template <typename Iterator>
  305. static int readNextToken (Iterator& source)
  306. {
  307. source.skipWhitespace();
  308. const juce_wchar firstChar = source.peekNextChar();
  309. switch (firstChar)
  310. {
  311. case 0:
  312. break;
  313. case '0': case '1': case '2': case '3': case '4':
  314. case '5': case '6': case '7': case '8': case '9':
  315. case '.':
  316. {
  317. int result = parseNumber (source);
  318. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  319. {
  320. source.skip();
  321. if (firstChar == '.')
  322. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  323. }
  324. return result;
  325. }
  326. case ',':
  327. case ';':
  328. case ':':
  329. source.skip();
  330. return CPlusPlusCodeTokeniser::tokenType_punctuation;
  331. case '(': case ')':
  332. case '{': case '}':
  333. case '[': case ']':
  334. source.skip();
  335. return CPlusPlusCodeTokeniser::tokenType_bracket;
  336. case '"':
  337. case '\'':
  338. skipQuotedString (source);
  339. return CPlusPlusCodeTokeniser::tokenType_string;
  340. case '+':
  341. source.skip();
  342. skipIfNextCharMatches (source, '+', '=');
  343. return CPlusPlusCodeTokeniser::tokenType_operator;
  344. case '-':
  345. {
  346. source.skip();
  347. int result = parseNumber (source);
  348. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  349. {
  350. skipIfNextCharMatches (source, '-', '=');
  351. return CPlusPlusCodeTokeniser::tokenType_operator;
  352. }
  353. return result;
  354. }
  355. case '*': case '%':
  356. case '=': case '!':
  357. source.skip();
  358. skipIfNextCharMatches (source, '=');
  359. return CPlusPlusCodeTokeniser::tokenType_operator;
  360. case '/':
  361. {
  362. source.skip();
  363. juce_wchar nextChar = source.peekNextChar();
  364. if (nextChar == '/')
  365. {
  366. source.skipToEndOfLine();
  367. return CPlusPlusCodeTokeniser::tokenType_comment;
  368. }
  369. if (nextChar == '*')
  370. {
  371. source.skip();
  372. skipComment (source);
  373. return CPlusPlusCodeTokeniser::tokenType_comment;
  374. }
  375. if (nextChar == '=')
  376. source.skip();
  377. return CPlusPlusCodeTokeniser::tokenType_operator;
  378. }
  379. case '?':
  380. case '~':
  381. source.skip();
  382. return CPlusPlusCodeTokeniser::tokenType_operator;
  383. case '<': case '>':
  384. case '|': case '&': case '^':
  385. source.skip();
  386. skipIfNextCharMatches (source, firstChar);
  387. skipIfNextCharMatches (source, '=');
  388. return CPlusPlusCodeTokeniser::tokenType_operator;
  389. case '#':
  390. skipPreprocessorLine (source);
  391. return CPlusPlusCodeTokeniser::tokenType_preprocessor;
  392. default:
  393. if (isIdentifierStart (firstChar))
  394. return parseIdentifier (source);
  395. source.skip();
  396. break;
  397. }
  398. return CPlusPlusCodeTokeniser::tokenType_error;
  399. }
  400. /** A class that can be passed to the CppTokeniserFunctions functions in order to
  401. parse a String.
  402. */
  403. struct StringIterator
  404. {
  405. StringIterator (const String& s) noexcept : t (s.getCharPointer()), numChars (0) {}
  406. StringIterator (String::CharPointerType s) noexcept : t (s), numChars (0) {}
  407. juce_wchar nextChar() noexcept { if (isEOF()) return 0; ++numChars; return t.getAndAdvance(); }
  408. juce_wchar peekNextChar()noexcept { return *t; }
  409. void skip() noexcept { if (! isEOF()) { ++t; ++numChars; } }
  410. void skipWhitespace() noexcept { while (t.isWhitespace()) skip(); }
  411. void skipToEndOfLine() noexcept { while (*t != '\r' && *t != '\n' && *t != 0) skip(); }
  412. bool isEOF() const noexcept { return t.isEmpty(); }
  413. String::CharPointerType t;
  414. int numChars;
  415. };
  416. //==============================================================================
  417. /** Takes a UTF8 string and writes it to a stream using standard C++ escape sequences for any
  418. non-ascii bytes.
  419. Although not strictly a tokenising function, this is still a function that often comes in
  420. handy when working with C++ code!
  421. Note that addEscapeChars() is easier to use than this function if you're working with Strings.
  422. @see addEscapeChars
  423. */
  424. static void writeEscapeChars (OutputStream& out, const char* utf8, const int numBytesToRead,
  425. const int maxCharsOnLine, const bool breakAtNewLines,
  426. const bool replaceSingleQuotes, const bool allowStringBreaks)
  427. {
  428. int charsOnLine = 0;
  429. bool lastWasHexEscapeCode = false;
  430. for (int i = 0; i < numBytesToRead || numBytesToRead < 0; ++i)
  431. {
  432. const unsigned char c = (unsigned char) utf8[i];
  433. bool startNewLine = false;
  434. switch (c)
  435. {
  436. case '\t': out << "\\t"; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  437. case '\r': out << "\\r"; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  438. case '\n': out << "\\n"; lastWasHexEscapeCode = false; charsOnLine += 2; startNewLine = breakAtNewLines; break;
  439. case '\\': out << "\\\\"; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  440. case '\"': out << "\\\""; lastWasHexEscapeCode = false; charsOnLine += 2; break;
  441. case 0:
  442. if (numBytesToRead < 0)
  443. return;
  444. out << "\\0";
  445. lastWasHexEscapeCode = true;
  446. charsOnLine += 2;
  447. break;
  448. case '\'':
  449. if (replaceSingleQuotes)
  450. {
  451. out << "\\\'";
  452. lastWasHexEscapeCode = false;
  453. charsOnLine += 2;
  454. break;
  455. }
  456. // deliberate fall-through...
  457. default:
  458. if (c >= 32 && c < 127 && ! (lastWasHexEscapeCode // (have to avoid following a hex escape sequence with a valid hex digit)
  459. && CharacterFunctions::getHexDigitValue (c) >= 0))
  460. {
  461. out << (char) c;
  462. lastWasHexEscapeCode = false;
  463. ++charsOnLine;
  464. }
  465. else if (allowStringBreaks && lastWasHexEscapeCode && c >= 32 && c < 127)
  466. {
  467. out << "\"\"" << (char) c;
  468. lastWasHexEscapeCode = false;
  469. charsOnLine += 3;
  470. }
  471. else
  472. {
  473. out << (c < 16 ? "\\x0" : "\\x") << String::toHexString ((int) c);
  474. lastWasHexEscapeCode = true;
  475. charsOnLine += 4;
  476. }
  477. break;
  478. }
  479. if ((startNewLine || (maxCharsOnLine > 0 && charsOnLine >= maxCharsOnLine))
  480. && (numBytesToRead < 0 || i < numBytesToRead - 1))
  481. {
  482. charsOnLine = 0;
  483. out << "\"" << newLine << "\"";
  484. lastWasHexEscapeCode = false;
  485. }
  486. }
  487. }
  488. /** Takes a string and returns a version of it where standard C++ escape sequences have been
  489. used to replace any non-ascii bytes.
  490. Although not strictly a tokenising function, this is still a function that often comes in
  491. handy when working with C++ code!
  492. @see writeEscapeChars
  493. */
  494. static String addEscapeChars (const String& s)
  495. {
  496. MemoryOutputStream mo;
  497. writeEscapeChars (mo, s.toRawUTF8(), -1, -1, false, true, true);
  498. return mo.toString();
  499. }
  500. };
  501. #endif // JUCE_CPLUSPLUSCODETOKENISERFUNCTIONS_H_INCLUDED