The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

614 lines
16KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. BEGIN_JUCE_NAMESPACE
  19. //==============================================================================
  20. CPlusPlusCodeTokeniser::CPlusPlusCodeTokeniser() {}
  21. CPlusPlusCodeTokeniser::~CPlusPlusCodeTokeniser() {}
  22. //==============================================================================
  23. namespace CppTokeniser
  24. {
  25. bool isIdentifierStart (const juce_wchar c) noexcept
  26. {
  27. return CharacterFunctions::isLetter (c)
  28. || c == '_' || c == '@';
  29. }
  30. bool isIdentifierBody (const juce_wchar c) noexcept
  31. {
  32. return CharacterFunctions::isLetterOrDigit (c)
  33. || c == '_' || c == '@';
  34. }
  35. bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  36. {
  37. static const char* const keywords2Char[] =
  38. { "if", "do", "or", "id", 0 };
  39. static const char* const keywords3Char[] =
  40. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  41. static const char* const keywords4Char[] =
  42. { "bool", "void", "this", "true", "long", "else", "char",
  43. "enum", "case", "goto", "auto", 0 };
  44. static const char* const keywords5Char[] =
  45. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  46. "float", "short", "throw", "union", "using", "or_eq", 0 };
  47. static const char* const keywords6Char[] =
  48. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  49. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  50. "switch", "typeid", "wchar_t", "xor_eq", 0};
  51. static const char* const keywords7Char[] =
  52. { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };
  53. static const char* const keywordsOther[] =
  54. { "noexcept", "const_cast", "continue", "explicit", "namespace",
  55. "operator", "protected", "register", "reinterpret_cast", "static_cast",
  56. "template", "typename", "unsigned", "volatile", "constexpr",
  57. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  58. "@private", "@property", "@protected", "@class", 0 };
  59. const char* const* k;
  60. switch (tokenLength)
  61. {
  62. case 2: k = keywords2Char; break;
  63. case 3: k = keywords3Char; break;
  64. case 4: k = keywords4Char; break;
  65. case 5: k = keywords5Char; break;
  66. case 6: k = keywords6Char; break;
  67. case 7: k = keywords7Char; break;
  68. default:
  69. if (tokenLength < 2 || tokenLength > 16)
  70. return false;
  71. k = keywordsOther;
  72. break;
  73. }
  74. int i = 0;
  75. while (k[i] != 0)
  76. {
  77. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  78. return true;
  79. ++i;
  80. }
  81. return false;
  82. }
  83. int parseIdentifier (CodeDocument::Iterator& source) noexcept
  84. {
  85. int tokenLength = 0;
  86. String::CharPointerType::CharType possibleIdentifier [100];
  87. String::CharPointerType possible (possibleIdentifier);
  88. while (isIdentifierBody (source.peekNextChar()))
  89. {
  90. const juce_wchar c = source.nextChar();
  91. if (tokenLength < 20)
  92. possible.write (c);
  93. ++tokenLength;
  94. }
  95. if (tokenLength > 1 && tokenLength <= 16)
  96. {
  97. possible.writeNull();
  98. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  99. return CPlusPlusCodeTokeniser::tokenType_builtInKeyword;
  100. }
  101. return CPlusPlusCodeTokeniser::tokenType_identifier;
  102. }
  103. bool skipNumberSuffix (CodeDocument::Iterator& source)
  104. {
  105. const juce_wchar c = source.peekNextChar();
  106. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  107. source.skip();
  108. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  109. return false;
  110. return true;
  111. }
  112. bool isHexDigit (const juce_wchar c) noexcept
  113. {
  114. return (c >= '0' && c <= '9')
  115. || (c >= 'a' && c <= 'f')
  116. || (c >= 'A' && c <= 'F');
  117. }
  118. bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
  119. {
  120. if (source.nextChar() != '0')
  121. return false;
  122. juce_wchar c = source.nextChar();
  123. if (c != 'x' && c != 'X')
  124. return false;
  125. int numDigits = 0;
  126. while (isHexDigit (source.peekNextChar()))
  127. {
  128. ++numDigits;
  129. source.skip();
  130. }
  131. if (numDigits == 0)
  132. return false;
  133. return skipNumberSuffix (source);
  134. }
  135. bool isOctalDigit (const juce_wchar c) noexcept
  136. {
  137. return c >= '0' && c <= '7';
  138. }
  139. bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept
  140. {
  141. if (source.nextChar() != '0')
  142. return false;
  143. if (! isOctalDigit (source.nextChar()))
  144. return false;
  145. while (isOctalDigit (source.peekNextChar()))
  146. source.skip();
  147. return skipNumberSuffix (source);
  148. }
  149. bool isDecimalDigit (const juce_wchar c) noexcept
  150. {
  151. return c >= '0' && c <= '9';
  152. }
  153. bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept
  154. {
  155. int numChars = 0;
  156. while (isDecimalDigit (source.peekNextChar()))
  157. {
  158. ++numChars;
  159. source.skip();
  160. }
  161. if (numChars == 0)
  162. return false;
  163. return skipNumberSuffix (source);
  164. }
  165. bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept
  166. {
  167. int numDigits = 0;
  168. while (isDecimalDigit (source.peekNextChar()))
  169. {
  170. source.skip();
  171. ++numDigits;
  172. }
  173. const bool hasPoint = (source.peekNextChar() == '.');
  174. if (hasPoint)
  175. {
  176. source.skip();
  177. while (isDecimalDigit (source.peekNextChar()))
  178. {
  179. source.skip();
  180. ++numDigits;
  181. }
  182. }
  183. if (numDigits == 0)
  184. return false;
  185. juce_wchar c = source.peekNextChar();
  186. const bool hasExponent = (c == 'e' || c == 'E');
  187. if (hasExponent)
  188. {
  189. source.skip();
  190. c = source.peekNextChar();
  191. if (c == '+' || c == '-')
  192. source.skip();
  193. int numExpDigits = 0;
  194. while (isDecimalDigit (source.peekNextChar()))
  195. {
  196. source.skip();
  197. ++numExpDigits;
  198. }
  199. if (numExpDigits == 0)
  200. return false;
  201. }
  202. c = source.peekNextChar();
  203. if (c == 'f' || c == 'F')
  204. source.skip();
  205. else if (! (hasExponent || hasPoint))
  206. return false;
  207. return true;
  208. }
  209. int parseNumber (CodeDocument::Iterator& source)
  210. {
  211. const CodeDocument::Iterator original (source);
  212. if (parseFloatLiteral (source))
  213. return CPlusPlusCodeTokeniser::tokenType_floatLiteral;
  214. source = original;
  215. if (parseHexLiteral (source))
  216. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  217. source = original;
  218. if (parseOctalLiteral (source))
  219. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  220. source = original;
  221. if (parseDecimalLiteral (source))
  222. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  223. source = original;
  224. source.skip();
  225. return CPlusPlusCodeTokeniser::tokenType_error;
  226. }
  227. void skipQuotedString (CodeDocument::Iterator& source) noexcept
  228. {
  229. const juce_wchar quote = source.nextChar();
  230. for (;;)
  231. {
  232. const juce_wchar c = source.nextChar();
  233. if (c == quote || c == 0)
  234. break;
  235. if (c == '\\')
  236. source.skip();
  237. }
  238. }
  239. void skipComment (CodeDocument::Iterator& source) noexcept
  240. {
  241. bool lastWasStar = false;
  242. for (;;)
  243. {
  244. const juce_wchar c = source.nextChar();
  245. if (c == 0 || (c == '/' && lastWasStar))
  246. break;
  247. lastWasStar = (c == '*');
  248. }
  249. }
  250. }
  251. //==============================================================================
  252. int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
  253. {
  254. int result = tokenType_error;
  255. source.skipWhitespace();
  256. juce_wchar firstChar = source.peekNextChar();
  257. switch (firstChar)
  258. {
  259. case 0:
  260. source.skip();
  261. break;
  262. case '0':
  263. case '1':
  264. case '2':
  265. case '3':
  266. case '4':
  267. case '5':
  268. case '6':
  269. case '7':
  270. case '8':
  271. case '9':
  272. result = CppTokeniser::parseNumber (source);
  273. break;
  274. case '.':
  275. result = CppTokeniser::parseNumber (source);
  276. if (result == tokenType_error)
  277. result = tokenType_punctuation;
  278. break;
  279. case ',':
  280. case ';':
  281. case ':':
  282. source.skip();
  283. result = tokenType_punctuation;
  284. break;
  285. case '(':
  286. case ')':
  287. case '{':
  288. case '}':
  289. case '[':
  290. case ']':
  291. source.skip();
  292. result = tokenType_bracket;
  293. break;
  294. case '"':
  295. case '\'':
  296. CppTokeniser::skipQuotedString (source);
  297. result = tokenType_stringLiteral;
  298. break;
  299. case '+':
  300. result = tokenType_operator;
  301. source.skip();
  302. if (source.peekNextChar() == '+')
  303. source.skip();
  304. else if (source.peekNextChar() == '=')
  305. source.skip();
  306. break;
  307. case '-':
  308. source.skip();
  309. result = CppTokeniser::parseNumber (source);
  310. if (result == tokenType_error)
  311. {
  312. result = tokenType_operator;
  313. if (source.peekNextChar() == '-')
  314. source.skip();
  315. else if (source.peekNextChar() == '=')
  316. source.skip();
  317. }
  318. break;
  319. case '*':
  320. case '%':
  321. case '=':
  322. case '!':
  323. result = tokenType_operator;
  324. source.skip();
  325. if (source.peekNextChar() == '=')
  326. source.skip();
  327. break;
  328. case '/':
  329. result = tokenType_operator;
  330. source.skip();
  331. if (source.peekNextChar() == '=')
  332. {
  333. source.skip();
  334. }
  335. else if (source.peekNextChar() == '/')
  336. {
  337. result = tokenType_comment;
  338. source.skipToEndOfLine();
  339. }
  340. else if (source.peekNextChar() == '*')
  341. {
  342. source.skip();
  343. result = tokenType_comment;
  344. CppTokeniser::skipComment (source);
  345. }
  346. break;
  347. case '?':
  348. case '~':
  349. source.skip();
  350. result = tokenType_operator;
  351. break;
  352. case '<':
  353. source.skip();
  354. result = tokenType_operator;
  355. if (source.peekNextChar() == '=')
  356. {
  357. source.skip();
  358. }
  359. else if (source.peekNextChar() == '<')
  360. {
  361. source.skip();
  362. if (source.peekNextChar() == '=')
  363. source.skip();
  364. }
  365. break;
  366. case '>':
  367. source.skip();
  368. result = tokenType_operator;
  369. if (source.peekNextChar() == '=')
  370. {
  371. source.skip();
  372. }
  373. else if (source.peekNextChar() == '<')
  374. {
  375. source.skip();
  376. if (source.peekNextChar() == '=')
  377. source.skip();
  378. }
  379. break;
  380. case '|':
  381. source.skip();
  382. result = tokenType_operator;
  383. if (source.peekNextChar() == '=')
  384. {
  385. source.skip();
  386. }
  387. else if (source.peekNextChar() == '|')
  388. {
  389. source.skip();
  390. if (source.peekNextChar() == '=')
  391. source.skip();
  392. }
  393. break;
  394. case '&':
  395. source.skip();
  396. result = tokenType_operator;
  397. if (source.peekNextChar() == '=')
  398. {
  399. source.skip();
  400. }
  401. else if (source.peekNextChar() == '&')
  402. {
  403. source.skip();
  404. if (source.peekNextChar() == '=')
  405. source.skip();
  406. }
  407. break;
  408. case '^':
  409. source.skip();
  410. result = tokenType_operator;
  411. if (source.peekNextChar() == '=')
  412. {
  413. source.skip();
  414. }
  415. else if (source.peekNextChar() == '^')
  416. {
  417. source.skip();
  418. if (source.peekNextChar() == '=')
  419. source.skip();
  420. }
  421. break;
  422. case '#':
  423. result = tokenType_preprocessor;
  424. source.skipToEndOfLine();
  425. break;
  426. default:
  427. if (CppTokeniser::isIdentifierStart (firstChar))
  428. result = CppTokeniser::parseIdentifier (source);
  429. else
  430. source.skip();
  431. break;
  432. }
  433. return result;
  434. }
  435. StringArray CPlusPlusCodeTokeniser::getTokenTypes()
  436. {
  437. const char* const types[] =
  438. {
  439. "Error",
  440. "Comment",
  441. "C++ keyword",
  442. "Identifier",
  443. "Integer literal",
  444. "Float literal",
  445. "String literal",
  446. "Operator",
  447. "Bracket",
  448. "Punctuation",
  449. "Preprocessor line",
  450. 0
  451. };
  452. return StringArray (types);
  453. }
  454. Colour CPlusPlusCodeTokeniser::getDefaultColour (const int tokenType)
  455. {
  456. const uint32 colours[] =
  457. {
  458. 0xffcc0000, // error
  459. 0xff00aa00, // comment
  460. 0xff0000cc, // keyword
  461. 0xff000000, // identifier
  462. 0xff880000, // int literal
  463. 0xff885500, // float literal
  464. 0xff990099, // string literal
  465. 0xff225500, // operator
  466. 0xff000055, // bracket
  467. 0xff004400, // punctuation
  468. 0xff660000 // preprocessor
  469. };
  470. if (tokenType >= 0 && tokenType < numElementsInArray (colours))
  471. return Colour (colours [tokenType]);
  472. return Colours::black;
  473. }
  474. bool CPlusPlusCodeTokeniser::isReservedKeyword (const String& token) noexcept
  475. {
  476. return CppTokeniser::isReservedKeyword (token.getCharPointer(), token.length());
  477. }
  478. END_JUCE_NAMESPACE