The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

571 lines
15KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. namespace CppTokeniser
  19. {
  20. enum TokenType
  21. {
  22. tokenType_error = 0,
  23. tokenType_comment,
  24. tokenType_keyword,
  25. tokenType_operator,
  26. tokenType_identifier,
  27. tokenType_integer,
  28. tokenType_float,
  29. tokenType_string,
  30. tokenType_bracket,
  31. tokenType_punctuation,
  32. tokenType_preprocessor
  33. };
  34. static bool isIdentifierStart (const juce_wchar c) noexcept
  35. {
  36. return CharacterFunctions::isLetter (c)
  37. || c == '_' || c == '@';
  38. }
  39. static bool isIdentifierBody (const juce_wchar c) noexcept
  40. {
  41. return CharacterFunctions::isLetterOrDigit (c)
  42. || c == '_' || c == '@';
  43. }
  44. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  45. {
  46. static const char* const keywords2Char[] =
  47. { "if", "do", "or", "id", 0 };
  48. static const char* const keywords3Char[] =
  49. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  50. static const char* const keywords4Char[] =
  51. { "bool", "void", "this", "true", "long", "else", "char",
  52. "enum", "case", "goto", "auto", 0 };
  53. static const char* const keywords5Char[] =
  54. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  55. "float", "short", "throw", "union", "using", "or_eq", 0 };
  56. static const char* const keywords6Char[] =
  57. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  58. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  59. "switch", "typeid", "wchar_t", "xor_eq", 0};
  60. static const char* const keywords7Char[] =
  61. { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };
  62. static const char* const keywordsOther[] =
  63. { "noexcept", "const_cast", "continue", "explicit", "namespace",
  64. "operator", "protected", "register", "reinterpret_cast", "static_cast",
  65. "template", "typename", "unsigned", "volatile", "constexpr",
  66. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  67. "@private", "@property", "@protected", "@class", 0 };
  68. const char* const* k;
  69. switch (tokenLength)
  70. {
  71. case 2: k = keywords2Char; break;
  72. case 3: k = keywords3Char; break;
  73. case 4: k = keywords4Char; break;
  74. case 5: k = keywords5Char; break;
  75. case 6: k = keywords6Char; break;
  76. case 7: k = keywords7Char; break;
  77. default:
  78. if (tokenLength < 2 || tokenLength > 16)
  79. return false;
  80. k = keywordsOther;
  81. break;
  82. }
  83. for (int i = 0; k[i] != 0; ++i)
  84. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  85. return true;
  86. return false;
  87. }
  88. static int parseIdentifier (CodeDocument::Iterator& source) noexcept
  89. {
  90. int tokenLength = 0;
  91. String::CharPointerType::CharType possibleIdentifier [100];
  92. String::CharPointerType possible (possibleIdentifier);
  93. while (isIdentifierBody (source.peekNextChar()))
  94. {
  95. const juce_wchar c = source.nextChar();
  96. if (tokenLength < 20)
  97. possible.write (c);
  98. ++tokenLength;
  99. }
  100. if (tokenLength > 1 && tokenLength <= 16)
  101. {
  102. possible.writeNull();
  103. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  104. return tokenType_keyword;
  105. }
  106. return tokenType_identifier;
  107. }
  108. static bool skipNumberSuffix (CodeDocument::Iterator& source)
  109. {
  110. const juce_wchar c = source.peekNextChar();
  111. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  112. source.skip();
  113. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  114. return false;
  115. return true;
  116. }
  117. static bool isHexDigit (const juce_wchar c) noexcept
  118. {
  119. return (c >= '0' && c <= '9')
  120. || (c >= 'a' && c <= 'f')
  121. || (c >= 'A' && c <= 'F');
  122. }
  123. static bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
  124. {
  125. if (source.nextChar() != '0')
  126. return false;
  127. juce_wchar c = source.nextChar();
  128. if (c != 'x' && c != 'X')
  129. return false;
  130. int numDigits = 0;
  131. while (isHexDigit (source.peekNextChar()))
  132. {
  133. ++numDigits;
  134. source.skip();
  135. }
  136. if (numDigits == 0)
  137. return false;
  138. return skipNumberSuffix (source);
  139. }
  140. static bool isOctalDigit (const juce_wchar c) noexcept
  141. {
  142. return c >= '0' && c <= '7';
  143. }
  144. static bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept
  145. {
  146. if (source.nextChar() != '0')
  147. return false;
  148. if (! isOctalDigit (source.nextChar()))
  149. return false;
  150. while (isOctalDigit (source.peekNextChar()))
  151. source.skip();
  152. return skipNumberSuffix (source);
  153. }
  154. static bool isDecimalDigit (const juce_wchar c) noexcept
  155. {
  156. return c >= '0' && c <= '9';
  157. }
  158. static bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept
  159. {
  160. int numChars = 0;
  161. while (isDecimalDigit (source.peekNextChar()))
  162. {
  163. ++numChars;
  164. source.skip();
  165. }
  166. if (numChars == 0)
  167. return false;
  168. return skipNumberSuffix (source);
  169. }
  170. static bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept
  171. {
  172. int numDigits = 0;
  173. while (isDecimalDigit (source.peekNextChar()))
  174. {
  175. source.skip();
  176. ++numDigits;
  177. }
  178. const bool hasPoint = (source.peekNextChar() == '.');
  179. if (hasPoint)
  180. {
  181. source.skip();
  182. while (isDecimalDigit (source.peekNextChar()))
  183. {
  184. source.skip();
  185. ++numDigits;
  186. }
  187. }
  188. if (numDigits == 0)
  189. return false;
  190. juce_wchar c = source.peekNextChar();
  191. const bool hasExponent = (c == 'e' || c == 'E');
  192. if (hasExponent)
  193. {
  194. source.skip();
  195. c = source.peekNextChar();
  196. if (c == '+' || c == '-')
  197. source.skip();
  198. int numExpDigits = 0;
  199. while (isDecimalDigit (source.peekNextChar()))
  200. {
  201. source.skip();
  202. ++numExpDigits;
  203. }
  204. if (numExpDigits == 0)
  205. return false;
  206. }
  207. c = source.peekNextChar();
  208. if (c == 'f' || c == 'F')
  209. source.skip();
  210. else if (! (hasExponent || hasPoint))
  211. return false;
  212. return true;
  213. }
  214. static int parseNumber (CodeDocument::Iterator& source)
  215. {
  216. const CodeDocument::Iterator original (source);
  217. if (parseFloatLiteral (source))
  218. return tokenType_float;
  219. source = original;
  220. if (parseHexLiteral (source))
  221. return tokenType_integer;
  222. source = original;
  223. if (parseOctalLiteral (source))
  224. return tokenType_integer;
  225. source = original;
  226. if (parseDecimalLiteral (source))
  227. return tokenType_integer;
  228. source = original;
  229. source.skip();
  230. return tokenType_error;
  231. }
  232. static void skipQuotedString (CodeDocument::Iterator& source) noexcept
  233. {
  234. const juce_wchar quote = source.nextChar();
  235. for (;;)
  236. {
  237. const juce_wchar c = source.nextChar();
  238. if (c == quote || c == 0)
  239. break;
  240. if (c == '\\')
  241. source.skip();
  242. }
  243. }
  244. static void skipComment (CodeDocument::Iterator& source) noexcept
  245. {
  246. bool lastWasStar = false;
  247. for (;;)
  248. {
  249. const juce_wchar c = source.nextChar();
  250. if (c == 0 || (c == '/' && lastWasStar))
  251. break;
  252. lastWasStar = (c == '*');
  253. }
  254. }
  255. static void skipPreprocessorLine (CodeDocument::Iterator& source) noexcept
  256. {
  257. bool lastWasBackslash = false;
  258. for (;;)
  259. {
  260. const juce_wchar c = source.peekNextChar();
  261. if (c == '"')
  262. {
  263. skipQuotedString (source);
  264. continue;
  265. }
  266. if (c == '/')
  267. {
  268. const juce_wchar c2 = source.peekNextChar();
  269. if (c2 == '/' || c2 == '*')
  270. return;
  271. }
  272. if (c == 0)
  273. break;
  274. if (c == '\n' || c == '\r')
  275. {
  276. source.skipToEndOfLine();
  277. if (lastWasBackslash)
  278. skipPreprocessorLine (source);
  279. break;
  280. }
  281. lastWasBackslash = (c == '\\');
  282. source.skip();
  283. }
  284. }
  285. static void skipIfNextCharMatches (CodeDocument::Iterator& source, const juce_wchar c) noexcept
  286. {
  287. if (source.peekNextChar() == c)
  288. source.skip();
  289. }
  290. static void skipIfNextCharMatches (CodeDocument::Iterator& source,
  291. const juce_wchar c1, const juce_wchar c2) noexcept
  292. {
  293. const juce_wchar c = source.peekNextChar();
  294. if (c == c1 || c == c2)
  295. source.skip();
  296. }
  297. }
  298. //==============================================================================
  299. CPlusPlusCodeTokeniser::CPlusPlusCodeTokeniser() {}
  300. CPlusPlusCodeTokeniser::~CPlusPlusCodeTokeniser() {}
  301. int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
  302. {
  303. using namespace CppTokeniser;
  304. int result = tokenType_error;
  305. source.skipWhitespace();
  306. const juce_wchar firstChar = source.peekNextChar();
  307. switch (firstChar)
  308. {
  309. case 0:
  310. source.skip();
  311. break;
  312. case '0':
  313. case '1':
  314. case '2':
  315. case '3':
  316. case '4':
  317. case '5':
  318. case '6':
  319. case '7':
  320. case '8':
  321. case '9':
  322. result = parseNumber (source);
  323. break;
  324. case '.':
  325. result = parseNumber (source);
  326. if (result == tokenType_error)
  327. result = tokenType_punctuation;
  328. break;
  329. case ',':
  330. case ';':
  331. case ':':
  332. source.skip();
  333. result = tokenType_punctuation;
  334. break;
  335. case '(':
  336. case ')':
  337. case '{':
  338. case '}':
  339. case '[':
  340. case ']':
  341. source.skip();
  342. result = tokenType_bracket;
  343. break;
  344. case '"':
  345. case '\'':
  346. skipQuotedString (source);
  347. result = tokenType_string;
  348. break;
  349. case '+':
  350. result = tokenType_operator;
  351. source.skip();
  352. skipIfNextCharMatches (source, '+', '=');
  353. break;
  354. case '-':
  355. source.skip();
  356. result = parseNumber (source);
  357. if (result == tokenType_error)
  358. {
  359. result = tokenType_operator;
  360. skipIfNextCharMatches (source, '-', '=');
  361. }
  362. break;
  363. case '*':
  364. case '%':
  365. case '=':
  366. case '!':
  367. result = tokenType_operator;
  368. source.skip();
  369. skipIfNextCharMatches (source, '=');
  370. break;
  371. case '/':
  372. result = tokenType_operator;
  373. source.skip();
  374. if (source.peekNextChar() == '=')
  375. {
  376. source.skip();
  377. }
  378. else if (source.peekNextChar() == '/')
  379. {
  380. result = tokenType_comment;
  381. source.skipToEndOfLine();
  382. }
  383. else if (source.peekNextChar() == '*')
  384. {
  385. source.skip();
  386. result = tokenType_comment;
  387. skipComment (source);
  388. }
  389. break;
  390. case '?':
  391. case '~':
  392. source.skip();
  393. result = tokenType_operator;
  394. break;
  395. case '<':
  396. case '>':
  397. case '|':
  398. case '&':
  399. case '^':
  400. source.skip();
  401. result = tokenType_operator;
  402. skipIfNextCharMatches (source, firstChar);
  403. skipIfNextCharMatches (source, '=');
  404. break;
  405. case '#':
  406. result = tokenType_preprocessor;
  407. skipPreprocessorLine (source);
  408. break;
  409. default:
  410. if (isIdentifierStart (firstChar))
  411. result = parseIdentifier (source);
  412. else
  413. source.skip();
  414. break;
  415. }
  416. return result;
  417. }
  418. CodeEditorComponent::ColourScheme CPlusPlusCodeTokeniser::getDefaultColourScheme()
  419. {
  420. struct Type
  421. {
  422. const char* name;
  423. uint32 colour;
  424. };
  425. const Type types[] =
  426. {
  427. { "Error", 0xffcc0000 },
  428. { "Comment", 0xff00aa00 },
  429. { "Keyword", 0xff0000cc },
  430. { "Operator", 0xff225500 },
  431. { "Identifier", 0xff000000 },
  432. { "Integer", 0xff880000 },
  433. { "Float", 0xff885500 },
  434. { "String", 0xff990099 },
  435. { "Bracket", 0xff000055 },
  436. { "Punctuation", 0xff004400 },
  437. { "Preprocessor Text", 0xff660000 }
  438. };
  439. CodeEditorComponent::ColourScheme cs;
  440. for (int i = 0; i < sizeof (types) / sizeof (types[0]); ++i) // (NB: numElementsInArray doesn't work here in GCC4.2)
  441. cs.set (types[i].name, Colour (types[i].colour));
  442. return cs;
  443. }
  444. bool CPlusPlusCodeTokeniser::isReservedKeyword (const String& token) noexcept
  445. {
  446. return CppTokeniser::isReservedKeyword (token.getCharPointer(), token.length());
  447. }