The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

609 lines
16KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. CPlusPlusCodeTokeniser::CPlusPlusCodeTokeniser() {}
  19. CPlusPlusCodeTokeniser::~CPlusPlusCodeTokeniser() {}
  20. //==============================================================================
  21. namespace CppTokeniser
  22. {
  23. static bool isIdentifierStart (const juce_wchar c) noexcept
  24. {
  25. return CharacterFunctions::isLetter (c)
  26. || c == '_' || c == '@';
  27. }
  28. static bool isIdentifierBody (const juce_wchar c) noexcept
  29. {
  30. return CharacterFunctions::isLetterOrDigit (c)
  31. || c == '_' || c == '@';
  32. }
  33. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  34. {
  35. static const char* const keywords2Char[] =
  36. { "if", "do", "or", "id", 0 };
  37. static const char* const keywords3Char[] =
  38. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  39. static const char* const keywords4Char[] =
  40. { "bool", "void", "this", "true", "long", "else", "char",
  41. "enum", "case", "goto", "auto", 0 };
  42. static const char* const keywords5Char[] =
  43. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  44. "float", "short", "throw", "union", "using", "or_eq", 0 };
  45. static const char* const keywords6Char[] =
  46. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  47. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  48. "switch", "typeid", "wchar_t", "xor_eq", 0};
  49. static const char* const keywords7Char[] =
  50. { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };
  51. static const char* const keywordsOther[] =
  52. { "noexcept", "const_cast", "continue", "explicit", "namespace",
  53. "operator", "protected", "register", "reinterpret_cast", "static_cast",
  54. "template", "typename", "unsigned", "volatile", "constexpr",
  55. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  56. "@private", "@property", "@protected", "@class", 0 };
  57. const char* const* k;
  58. switch (tokenLength)
  59. {
  60. case 2: k = keywords2Char; break;
  61. case 3: k = keywords3Char; break;
  62. case 4: k = keywords4Char; break;
  63. case 5: k = keywords5Char; break;
  64. case 6: k = keywords6Char; break;
  65. case 7: k = keywords7Char; break;
  66. default:
  67. if (tokenLength < 2 || tokenLength > 16)
  68. return false;
  69. k = keywordsOther;
  70. break;
  71. }
  72. int i = 0;
  73. while (k[i] != 0)
  74. {
  75. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  76. return true;
  77. ++i;
  78. }
  79. return false;
  80. }
  81. static int parseIdentifier (CodeDocument::Iterator& source) noexcept
  82. {
  83. int tokenLength = 0;
  84. String::CharPointerType::CharType possibleIdentifier [100];
  85. String::CharPointerType possible (possibleIdentifier);
  86. while (isIdentifierBody (source.peekNextChar()))
  87. {
  88. const juce_wchar c = source.nextChar();
  89. if (tokenLength < 20)
  90. possible.write (c);
  91. ++tokenLength;
  92. }
  93. if (tokenLength > 1 && tokenLength <= 16)
  94. {
  95. possible.writeNull();
  96. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  97. return CPlusPlusCodeTokeniser::tokenType_builtInKeyword;
  98. }
  99. return CPlusPlusCodeTokeniser::tokenType_identifier;
  100. }
  101. static bool skipNumberSuffix (CodeDocument::Iterator& source)
  102. {
  103. const juce_wchar c = source.peekNextChar();
  104. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  105. source.skip();
  106. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  107. return false;
  108. return true;
  109. }
  110. static bool isHexDigit (const juce_wchar c) noexcept
  111. {
  112. return (c >= '0' && c <= '9')
  113. || (c >= 'a' && c <= 'f')
  114. || (c >= 'A' && c <= 'F');
  115. }
  116. static bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
  117. {
  118. if (source.nextChar() != '0')
  119. return false;
  120. juce_wchar c = source.nextChar();
  121. if (c != 'x' && c != 'X')
  122. return false;
  123. int numDigits = 0;
  124. while (isHexDigit (source.peekNextChar()))
  125. {
  126. ++numDigits;
  127. source.skip();
  128. }
  129. if (numDigits == 0)
  130. return false;
  131. return skipNumberSuffix (source);
  132. }
  133. static bool isOctalDigit (const juce_wchar c) noexcept
  134. {
  135. return c >= '0' && c <= '7';
  136. }
  137. static bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept
  138. {
  139. if (source.nextChar() != '0')
  140. return false;
  141. if (! isOctalDigit (source.nextChar()))
  142. return false;
  143. while (isOctalDigit (source.peekNextChar()))
  144. source.skip();
  145. return skipNumberSuffix (source);
  146. }
  147. static bool isDecimalDigit (const juce_wchar c) noexcept
  148. {
  149. return c >= '0' && c <= '9';
  150. }
  151. static bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept
  152. {
  153. int numChars = 0;
  154. while (isDecimalDigit (source.peekNextChar()))
  155. {
  156. ++numChars;
  157. source.skip();
  158. }
  159. if (numChars == 0)
  160. return false;
  161. return skipNumberSuffix (source);
  162. }
  163. static bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept
  164. {
  165. int numDigits = 0;
  166. while (isDecimalDigit (source.peekNextChar()))
  167. {
  168. source.skip();
  169. ++numDigits;
  170. }
  171. const bool hasPoint = (source.peekNextChar() == '.');
  172. if (hasPoint)
  173. {
  174. source.skip();
  175. while (isDecimalDigit (source.peekNextChar()))
  176. {
  177. source.skip();
  178. ++numDigits;
  179. }
  180. }
  181. if (numDigits == 0)
  182. return false;
  183. juce_wchar c = source.peekNextChar();
  184. const bool hasExponent = (c == 'e' || c == 'E');
  185. if (hasExponent)
  186. {
  187. source.skip();
  188. c = source.peekNextChar();
  189. if (c == '+' || c == '-')
  190. source.skip();
  191. int numExpDigits = 0;
  192. while (isDecimalDigit (source.peekNextChar()))
  193. {
  194. source.skip();
  195. ++numExpDigits;
  196. }
  197. if (numExpDigits == 0)
  198. return false;
  199. }
  200. c = source.peekNextChar();
  201. if (c == 'f' || c == 'F')
  202. source.skip();
  203. else if (! (hasExponent || hasPoint))
  204. return false;
  205. return true;
  206. }
  207. static int parseNumber (CodeDocument::Iterator& source)
  208. {
  209. const CodeDocument::Iterator original (source);
  210. if (parseFloatLiteral (source))
  211. return CPlusPlusCodeTokeniser::tokenType_floatLiteral;
  212. source = original;
  213. if (parseHexLiteral (source))
  214. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  215. source = original;
  216. if (parseOctalLiteral (source))
  217. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  218. source = original;
  219. if (parseDecimalLiteral (source))
  220. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  221. source = original;
  222. source.skip();
  223. return CPlusPlusCodeTokeniser::tokenType_error;
  224. }
  225. static void skipQuotedString (CodeDocument::Iterator& source) noexcept
  226. {
  227. const juce_wchar quote = source.nextChar();
  228. for (;;)
  229. {
  230. const juce_wchar c = source.nextChar();
  231. if (c == quote || c == 0)
  232. break;
  233. if (c == '\\')
  234. source.skip();
  235. }
  236. }
  237. static void skipComment (CodeDocument::Iterator& source) noexcept
  238. {
  239. bool lastWasStar = false;
  240. for (;;)
  241. {
  242. const juce_wchar c = source.nextChar();
  243. if (c == 0 || (c == '/' && lastWasStar))
  244. break;
  245. lastWasStar = (c == '*');
  246. }
  247. }
  248. }
  249. //==============================================================================
  250. int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
  251. {
  252. int result = tokenType_error;
  253. source.skipWhitespace();
  254. juce_wchar firstChar = source.peekNextChar();
  255. switch (firstChar)
  256. {
  257. case 0:
  258. source.skip();
  259. break;
  260. case '0':
  261. case '1':
  262. case '2':
  263. case '3':
  264. case '4':
  265. case '5':
  266. case '6':
  267. case '7':
  268. case '8':
  269. case '9':
  270. result = CppTokeniser::parseNumber (source);
  271. break;
  272. case '.':
  273. result = CppTokeniser::parseNumber (source);
  274. if (result == tokenType_error)
  275. result = tokenType_punctuation;
  276. break;
  277. case ',':
  278. case ';':
  279. case ':':
  280. source.skip();
  281. result = tokenType_punctuation;
  282. break;
  283. case '(':
  284. case ')':
  285. case '{':
  286. case '}':
  287. case '[':
  288. case ']':
  289. source.skip();
  290. result = tokenType_bracket;
  291. break;
  292. case '"':
  293. case '\'':
  294. CppTokeniser::skipQuotedString (source);
  295. result = tokenType_stringLiteral;
  296. break;
  297. case '+':
  298. result = tokenType_operator;
  299. source.skip();
  300. if (source.peekNextChar() == '+')
  301. source.skip();
  302. else if (source.peekNextChar() == '=')
  303. source.skip();
  304. break;
  305. case '-':
  306. source.skip();
  307. result = CppTokeniser::parseNumber (source);
  308. if (result == tokenType_error)
  309. {
  310. result = tokenType_operator;
  311. if (source.peekNextChar() == '-')
  312. source.skip();
  313. else if (source.peekNextChar() == '=')
  314. source.skip();
  315. }
  316. break;
  317. case '*':
  318. case '%':
  319. case '=':
  320. case '!':
  321. result = tokenType_operator;
  322. source.skip();
  323. if (source.peekNextChar() == '=')
  324. source.skip();
  325. break;
  326. case '/':
  327. result = tokenType_operator;
  328. source.skip();
  329. if (source.peekNextChar() == '=')
  330. {
  331. source.skip();
  332. }
  333. else if (source.peekNextChar() == '/')
  334. {
  335. result = tokenType_comment;
  336. source.skipToEndOfLine();
  337. }
  338. else if (source.peekNextChar() == '*')
  339. {
  340. source.skip();
  341. result = tokenType_comment;
  342. CppTokeniser::skipComment (source);
  343. }
  344. break;
  345. case '?':
  346. case '~':
  347. source.skip();
  348. result = tokenType_operator;
  349. break;
  350. case '<':
  351. source.skip();
  352. result = tokenType_operator;
  353. if (source.peekNextChar() == '=')
  354. {
  355. source.skip();
  356. }
  357. else if (source.peekNextChar() == '<')
  358. {
  359. source.skip();
  360. if (source.peekNextChar() == '=')
  361. source.skip();
  362. }
  363. break;
  364. case '>':
  365. source.skip();
  366. result = tokenType_operator;
  367. if (source.peekNextChar() == '=')
  368. {
  369. source.skip();
  370. }
  371. else if (source.peekNextChar() == '<')
  372. {
  373. source.skip();
  374. if (source.peekNextChar() == '=')
  375. source.skip();
  376. }
  377. break;
  378. case '|':
  379. source.skip();
  380. result = tokenType_operator;
  381. if (source.peekNextChar() == '=')
  382. {
  383. source.skip();
  384. }
  385. else if (source.peekNextChar() == '|')
  386. {
  387. source.skip();
  388. if (source.peekNextChar() == '=')
  389. source.skip();
  390. }
  391. break;
  392. case '&':
  393. source.skip();
  394. result = tokenType_operator;
  395. if (source.peekNextChar() == '=')
  396. {
  397. source.skip();
  398. }
  399. else if (source.peekNextChar() == '&')
  400. {
  401. source.skip();
  402. if (source.peekNextChar() == '=')
  403. source.skip();
  404. }
  405. break;
  406. case '^':
  407. source.skip();
  408. result = tokenType_operator;
  409. if (source.peekNextChar() == '=')
  410. {
  411. source.skip();
  412. }
  413. else if (source.peekNextChar() == '^')
  414. {
  415. source.skip();
  416. if (source.peekNextChar() == '=')
  417. source.skip();
  418. }
  419. break;
  420. case '#':
  421. result = tokenType_preprocessor;
  422. source.skipToEndOfLine();
  423. break;
  424. default:
  425. if (CppTokeniser::isIdentifierStart (firstChar))
  426. result = CppTokeniser::parseIdentifier (source);
  427. else
  428. source.skip();
  429. break;
  430. }
  431. return result;
  432. }
  433. StringArray CPlusPlusCodeTokeniser::getTokenTypes()
  434. {
  435. const char* const types[] =
  436. {
  437. "Error",
  438. "Comment",
  439. "C++ keyword",
  440. "Identifier",
  441. "Integer literal",
  442. "Float literal",
  443. "String literal",
  444. "Operator",
  445. "Bracket",
  446. "Punctuation",
  447. "Preprocessor line",
  448. 0
  449. };
  450. return StringArray (types);
  451. }
  452. Colour CPlusPlusCodeTokeniser::getDefaultColour (const int tokenType)
  453. {
  454. const uint32 colours[] =
  455. {
  456. 0xffcc0000, // error
  457. 0xff00aa00, // comment
  458. 0xff0000cc, // keyword
  459. 0xff000000, // identifier
  460. 0xff880000, // int literal
  461. 0xff885500, // float literal
  462. 0xff990099, // string literal
  463. 0xff225500, // operator
  464. 0xff000055, // bracket
  465. 0xff004400, // punctuation
  466. 0xff660000 // preprocessor
  467. };
  468. if (tokenType >= 0 && tokenType < numElementsInArray (colours))
  469. return Colour (colours [tokenType]);
  470. return Colours::black;
  471. }
  472. bool CPlusPlusCodeTokeniser::isReservedKeyword (const String& token) noexcept
  473. {
  474. return CppTokeniser::isReservedKeyword (token.getCharPointer(), token.length());
  475. }