The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

572 lines
15KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. namespace CppTokeniser
  19. {
  20. enum TokenType
  21. {
  22. tokenType_error = 0,
  23. tokenType_comment,
  24. tokenType_keyword,
  25. tokenType_operator,
  26. tokenType_identifier,
  27. tokenType_integer,
  28. tokenType_float,
  29. tokenType_string,
  30. tokenType_bracket,
  31. tokenType_punctuation,
  32. tokenType_preprocessor
  33. };
  34. static bool isIdentifierStart (const juce_wchar c) noexcept
  35. {
  36. return CharacterFunctions::isLetter (c)
  37. || c == '_' || c == '@';
  38. }
  39. static bool isIdentifierBody (const juce_wchar c) noexcept
  40. {
  41. return CharacterFunctions::isLetterOrDigit (c)
  42. || c == '_' || c == '@';
  43. }
  44. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  45. {
  46. static const char* const keywords2Char[] =
  47. { "if", "do", "or", "id", 0 };
  48. static const char* const keywords3Char[] =
  49. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  50. static const char* const keywords4Char[] =
  51. { "bool", "void", "this", "true", "long", "else", "char",
  52. "enum", "case", "goto", "auto", 0 };
  53. static const char* const keywords5Char[] =
  54. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  55. "float", "short", "throw", "union", "using", "or_eq", 0 };
  56. static const char* const keywords6Char[] =
  57. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  58. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  59. "switch", "typeid", "wchar_t", "xor_eq", 0};
  60. static const char* const keywords7Char[] =
  61. { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };
  62. static const char* const keywordsOther[] =
  63. { "noexcept", "const_cast", "continue", "explicit", "namespace",
  64. "operator", "protected", "register", "reinterpret_cast", "static_cast",
  65. "template", "typename", "unsigned", "volatile", "constexpr",
  66. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  67. "@private", "@property", "@protected", "@class", 0 };
  68. const char* const* k;
  69. switch (tokenLength)
  70. {
  71. case 2: k = keywords2Char; break;
  72. case 3: k = keywords3Char; break;
  73. case 4: k = keywords4Char; break;
  74. case 5: k = keywords5Char; break;
  75. case 6: k = keywords6Char; break;
  76. case 7: k = keywords7Char; break;
  77. default:
  78. if (tokenLength < 2 || tokenLength > 16)
  79. return false;
  80. k = keywordsOther;
  81. break;
  82. }
  83. for (int i = 0; k[i] != 0; ++i)
  84. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  85. return true;
  86. return false;
  87. }
  88. static int parseIdentifier (CodeDocument::Iterator& source) noexcept
  89. {
  90. int tokenLength = 0;
  91. String::CharPointerType::CharType possibleIdentifier [100];
  92. String::CharPointerType possible (possibleIdentifier);
  93. while (isIdentifierBody (source.peekNextChar()))
  94. {
  95. const juce_wchar c = source.nextChar();
  96. if (tokenLength < 20)
  97. possible.write (c);
  98. ++tokenLength;
  99. }
  100. if (tokenLength > 1 && tokenLength <= 16)
  101. {
  102. possible.writeNull();
  103. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  104. return tokenType_keyword;
  105. }
  106. return tokenType_identifier;
  107. }
  108. static bool skipNumberSuffix (CodeDocument::Iterator& source)
  109. {
  110. const juce_wchar c = source.peekNextChar();
  111. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  112. source.skip();
  113. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  114. return false;
  115. return true;
  116. }
  117. static bool isHexDigit (const juce_wchar c) noexcept
  118. {
  119. return (c >= '0' && c <= '9')
  120. || (c >= 'a' && c <= 'f')
  121. || (c >= 'A' && c <= 'F');
  122. }
  123. static bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
  124. {
  125. if (source.nextChar() != '0')
  126. return false;
  127. juce_wchar c = source.nextChar();
  128. if (c != 'x' && c != 'X')
  129. return false;
  130. int numDigits = 0;
  131. while (isHexDigit (source.peekNextChar()))
  132. {
  133. ++numDigits;
  134. source.skip();
  135. }
  136. if (numDigits == 0)
  137. return false;
  138. return skipNumberSuffix (source);
  139. }
  140. static bool isOctalDigit (const juce_wchar c) noexcept
  141. {
  142. return c >= '0' && c <= '7';
  143. }
  144. static bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept
  145. {
  146. if (source.nextChar() != '0')
  147. return false;
  148. if (! isOctalDigit (source.nextChar()))
  149. return false;
  150. while (isOctalDigit (source.peekNextChar()))
  151. source.skip();
  152. return skipNumberSuffix (source);
  153. }
  154. static bool isDecimalDigit (const juce_wchar c) noexcept
  155. {
  156. return c >= '0' && c <= '9';
  157. }
  158. static bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept
  159. {
  160. int numChars = 0;
  161. while (isDecimalDigit (source.peekNextChar()))
  162. {
  163. ++numChars;
  164. source.skip();
  165. }
  166. if (numChars == 0)
  167. return false;
  168. return skipNumberSuffix (source);
  169. }
  170. static bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept
  171. {
  172. int numDigits = 0;
  173. while (isDecimalDigit (source.peekNextChar()))
  174. {
  175. source.skip();
  176. ++numDigits;
  177. }
  178. const bool hasPoint = (source.peekNextChar() == '.');
  179. if (hasPoint)
  180. {
  181. source.skip();
  182. while (isDecimalDigit (source.peekNextChar()))
  183. {
  184. source.skip();
  185. ++numDigits;
  186. }
  187. }
  188. if (numDigits == 0)
  189. return false;
  190. juce_wchar c = source.peekNextChar();
  191. const bool hasExponent = (c == 'e' || c == 'E');
  192. if (hasExponent)
  193. {
  194. source.skip();
  195. c = source.peekNextChar();
  196. if (c == '+' || c == '-')
  197. source.skip();
  198. int numExpDigits = 0;
  199. while (isDecimalDigit (source.peekNextChar()))
  200. {
  201. source.skip();
  202. ++numExpDigits;
  203. }
  204. if (numExpDigits == 0)
  205. return false;
  206. }
  207. c = source.peekNextChar();
  208. if (c == 'f' || c == 'F')
  209. source.skip();
  210. else if (! (hasExponent || hasPoint))
  211. return false;
  212. return true;
  213. }
  214. static int parseNumber (CodeDocument::Iterator& source)
  215. {
  216. const CodeDocument::Iterator original (source);
  217. if (parseFloatLiteral (source))
  218. return tokenType_float;
  219. source = original;
  220. if (parseHexLiteral (source))
  221. return tokenType_integer;
  222. source = original;
  223. if (parseOctalLiteral (source))
  224. return tokenType_integer;
  225. source = original;
  226. if (parseDecimalLiteral (source))
  227. return tokenType_integer;
  228. source = original;
  229. return tokenType_error;
  230. }
  231. static void skipQuotedString (CodeDocument::Iterator& source) noexcept
  232. {
  233. const juce_wchar quote = source.nextChar();
  234. for (;;)
  235. {
  236. const juce_wchar c = source.nextChar();
  237. if (c == quote || c == 0)
  238. break;
  239. if (c == '\\')
  240. source.skip();
  241. }
  242. }
  243. static void skipComment (CodeDocument::Iterator& source) noexcept
  244. {
  245. bool lastWasStar = false;
  246. for (;;)
  247. {
  248. const juce_wchar c = source.nextChar();
  249. if (c == 0 || (c == '/' && lastWasStar))
  250. break;
  251. lastWasStar = (c == '*');
  252. }
  253. }
  254. static void skipPreprocessorLine (CodeDocument::Iterator& source) noexcept
  255. {
  256. bool lastWasBackslash = false;
  257. for (;;)
  258. {
  259. const juce_wchar c = source.peekNextChar();
  260. if (c == '"')
  261. {
  262. skipQuotedString (source);
  263. continue;
  264. }
  265. if (c == '/')
  266. {
  267. CodeDocument::Iterator next (source);
  268. next.skip();
  269. const juce_wchar c2 = next.peekNextChar();
  270. if (c2 == '/' || c2 == '*')
  271. return;
  272. }
  273. if (c == 0)
  274. break;
  275. if (c == '\n' || c == '\r')
  276. {
  277. source.skipToEndOfLine();
  278. if (lastWasBackslash)
  279. skipPreprocessorLine (source);
  280. break;
  281. }
  282. lastWasBackslash = (c == '\\');
  283. source.skip();
  284. }
  285. }
  286. static void skipIfNextCharMatches (CodeDocument::Iterator& source, const juce_wchar c) noexcept
  287. {
  288. if (source.peekNextChar() == c)
  289. source.skip();
  290. }
  291. static void skipIfNextCharMatches (CodeDocument::Iterator& source,
  292. const juce_wchar c1, const juce_wchar c2) noexcept
  293. {
  294. const juce_wchar c = source.peekNextChar();
  295. if (c == c1 || c == c2)
  296. source.skip();
  297. }
  298. }
  299. //==============================================================================
  300. CPlusPlusCodeTokeniser::CPlusPlusCodeTokeniser() {}
  301. CPlusPlusCodeTokeniser::~CPlusPlusCodeTokeniser() {}
  302. int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
  303. {
  304. using namespace CppTokeniser;
  305. int result = tokenType_error;
  306. source.skipWhitespace();
  307. const juce_wchar firstChar = source.peekNextChar();
  308. switch (firstChar)
  309. {
  310. case 0:
  311. source.skip();
  312. break;
  313. case '0':
  314. case '1':
  315. case '2':
  316. case '3':
  317. case '4':
  318. case '5':
  319. case '6':
  320. case '7':
  321. case '8':
  322. case '9':
  323. case '.':
  324. result = parseNumber (source);
  325. if (result == tokenType_error)
  326. {
  327. source.skip();
  328. if (firstChar == '.')
  329. result = tokenType_punctuation;
  330. }
  331. break;
  332. case ',':
  333. case ';':
  334. case ':':
  335. source.skip();
  336. result = tokenType_punctuation;
  337. break;
  338. case '(':
  339. case ')':
  340. case '{':
  341. case '}':
  342. case '[':
  343. case ']':
  344. source.skip();
  345. result = tokenType_bracket;
  346. break;
  347. case '"':
  348. case '\'':
  349. skipQuotedString (source);
  350. result = tokenType_string;
  351. break;
  352. case '+':
  353. result = tokenType_operator;
  354. source.skip();
  355. skipIfNextCharMatches (source, '+', '=');
  356. break;
  357. case '-':
  358. source.skip();
  359. result = parseNumber (source);
  360. if (result == tokenType_error)
  361. {
  362. result = tokenType_operator;
  363. skipIfNextCharMatches (source, '-', '=');
  364. }
  365. break;
  366. case '*':
  367. case '%':
  368. case '=':
  369. case '!':
  370. result = tokenType_operator;
  371. source.skip();
  372. skipIfNextCharMatches (source, '=');
  373. break;
  374. case '/':
  375. result = tokenType_operator;
  376. source.skip();
  377. if (source.peekNextChar() == '=')
  378. {
  379. source.skip();
  380. }
  381. else if (source.peekNextChar() == '/')
  382. {
  383. result = tokenType_comment;
  384. source.skipToEndOfLine();
  385. }
  386. else if (source.peekNextChar() == '*')
  387. {
  388. source.skip();
  389. result = tokenType_comment;
  390. skipComment (source);
  391. }
  392. break;
  393. case '?':
  394. case '~':
  395. source.skip();
  396. result = tokenType_operator;
  397. break;
  398. case '<':
  399. case '>':
  400. case '|':
  401. case '&':
  402. case '^':
  403. source.skip();
  404. result = tokenType_operator;
  405. skipIfNextCharMatches (source, firstChar);
  406. skipIfNextCharMatches (source, '=');
  407. break;
  408. case '#':
  409. result = tokenType_preprocessor;
  410. skipPreprocessorLine (source);
  411. break;
  412. default:
  413. if (isIdentifierStart (firstChar))
  414. result = parseIdentifier (source);
  415. else
  416. source.skip();
  417. break;
  418. }
  419. return result;
  420. }
  421. CodeEditorComponent::ColourScheme CPlusPlusCodeTokeniser::getDefaultColourScheme()
  422. {
  423. struct Type
  424. {
  425. const char* name;
  426. uint32 colour;
  427. };
  428. const Type types[] =
  429. {
  430. { "Error", 0xffcc0000 },
  431. { "Comment", 0xff00aa00 },
  432. { "Keyword", 0xff0000cc },
  433. { "Operator", 0xff225500 },
  434. { "Identifier", 0xff000000 },
  435. { "Integer", 0xff880000 },
  436. { "Float", 0xff885500 },
  437. { "String", 0xff990099 },
  438. { "Bracket", 0xff000055 },
  439. { "Punctuation", 0xff004400 },
  440. { "Preprocessor Text", 0xff660000 }
  441. };
  442. CodeEditorComponent::ColourScheme cs;
  443. for (int i = 0; i < sizeof (types) / sizeof (types[0]); ++i) // (NB: numElementsInArray doesn't work here in GCC4.2)
  444. cs.set (types[i].name, Colour (types[i].colour));
  445. return cs;
  446. }
  447. bool CPlusPlusCodeTokeniser::isReservedKeyword (const String& token) noexcept
  448. {
  449. return CppTokeniser::isReservedKeyword (token.getCharPointer(), token.length());
  450. }