The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

573 lines
15KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. namespace CppTokeniser
  19. {
  20. enum TokenType
  21. {
  22. tokenType_error = 0,
  23. tokenType_comment,
  24. tokenType_keyword,
  25. tokenType_operator,
  26. tokenType_identifier,
  27. tokenType_integer,
  28. tokenType_float,
  29. tokenType_string,
  30. tokenType_bracket,
  31. tokenType_punctuation,
  32. tokenType_preprocessor
  33. };
  34. static bool isIdentifierStart (const juce_wchar c) noexcept
  35. {
  36. return CharacterFunctions::isLetter (c)
  37. || c == '_' || c == '@';
  38. }
  39. static bool isIdentifierBody (const juce_wchar c) noexcept
  40. {
  41. return CharacterFunctions::isLetterOrDigit (c)
  42. || c == '_' || c == '@';
  43. }
  44. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  45. {
  46. static const char* const keywords2Char[] =
  47. { "if", "do", "or", "id", 0 };
  48. static const char* const keywords3Char[] =
  49. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  50. static const char* const keywords4Char[] =
  51. { "bool", "void", "this", "true", "long", "else", "char",
  52. "enum", "case", "goto", "auto", 0 };
  53. static const char* const keywords5Char[] =
  54. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  55. "float", "short", "throw", "union", "using", "or_eq", 0 };
  56. static const char* const keywords6Char[] =
  57. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  58. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  59. "switch", "typeid", "wchar_t", "xor_eq", 0};
  60. static const char* const keywords7Char[] =
  61. { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };
  62. static const char* const keywordsOther[] =
  63. { "noexcept", "const_cast", "continue", "explicit", "namespace",
  64. "operator", "protected", "register", "reinterpret_cast", "static_cast",
  65. "template", "typename", "unsigned", "volatile", "constexpr",
  66. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  67. "@private", "@property", "@protected", "@class", 0 };
  68. const char* const* k;
  69. switch (tokenLength)
  70. {
  71. case 2: k = keywords2Char; break;
  72. case 3: k = keywords3Char; break;
  73. case 4: k = keywords4Char; break;
  74. case 5: k = keywords5Char; break;
  75. case 6: k = keywords6Char; break;
  76. case 7: k = keywords7Char; break;
  77. default:
  78. if (tokenLength < 2 || tokenLength > 16)
  79. return false;
  80. k = keywordsOther;
  81. break;
  82. }
  83. for (int i = 0; k[i] != 0; ++i)
  84. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  85. return true;
  86. return false;
  87. }
  88. static int parseIdentifier (CodeDocument::Iterator& source) noexcept
  89. {
  90. int tokenLength = 0;
  91. String::CharPointerType::CharType possibleIdentifier [100];
  92. String::CharPointerType possible (possibleIdentifier);
  93. while (isIdentifierBody (source.peekNextChar()))
  94. {
  95. const juce_wchar c = source.nextChar();
  96. if (tokenLength < 20)
  97. possible.write (c);
  98. ++tokenLength;
  99. }
  100. if (tokenLength > 1 && tokenLength <= 16)
  101. {
  102. possible.writeNull();
  103. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  104. return tokenType_keyword;
  105. }
  106. return tokenType_identifier;
  107. }
  108. static bool skipNumberSuffix (CodeDocument::Iterator& source)
  109. {
  110. const juce_wchar c = source.peekNextChar();
  111. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  112. source.skip();
  113. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  114. return false;
  115. return true;
  116. }
  117. static bool isHexDigit (const juce_wchar c) noexcept
  118. {
  119. return (c >= '0' && c <= '9')
  120. || (c >= 'a' && c <= 'f')
  121. || (c >= 'A' && c <= 'F');
  122. }
  123. static bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
  124. {
  125. if (source.nextChar() != '0')
  126. return false;
  127. juce_wchar c = source.nextChar();
  128. if (c != 'x' && c != 'X')
  129. return false;
  130. int numDigits = 0;
  131. while (isHexDigit (source.peekNextChar()))
  132. {
  133. ++numDigits;
  134. source.skip();
  135. }
  136. if (numDigits == 0)
  137. return false;
  138. return skipNumberSuffix (source);
  139. }
  140. static bool isOctalDigit (const juce_wchar c) noexcept
  141. {
  142. return c >= '0' && c <= '7';
  143. }
  144. static bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept
  145. {
  146. if (source.nextChar() != '0')
  147. return false;
  148. if (! isOctalDigit (source.nextChar()))
  149. return false;
  150. while (isOctalDigit (source.peekNextChar()))
  151. source.skip();
  152. return skipNumberSuffix (source);
  153. }
  154. static bool isDecimalDigit (const juce_wchar c) noexcept
  155. {
  156. return c >= '0' && c <= '9';
  157. }
  158. static bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept
  159. {
  160. int numChars = 0;
  161. while (isDecimalDigit (source.peekNextChar()))
  162. {
  163. ++numChars;
  164. source.skip();
  165. }
  166. if (numChars == 0)
  167. return false;
  168. return skipNumberSuffix (source);
  169. }
  170. static bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept
  171. {
  172. int numDigits = 0;
  173. while (isDecimalDigit (source.peekNextChar()))
  174. {
  175. source.skip();
  176. ++numDigits;
  177. }
  178. const bool hasPoint = (source.peekNextChar() == '.');
  179. if (hasPoint)
  180. {
  181. source.skip();
  182. while (isDecimalDigit (source.peekNextChar()))
  183. {
  184. source.skip();
  185. ++numDigits;
  186. }
  187. }
  188. if (numDigits == 0)
  189. return false;
  190. juce_wchar c = source.peekNextChar();
  191. const bool hasExponent = (c == 'e' || c == 'E');
  192. if (hasExponent)
  193. {
  194. source.skip();
  195. c = source.peekNextChar();
  196. if (c == '+' || c == '-')
  197. source.skip();
  198. int numExpDigits = 0;
  199. while (isDecimalDigit (source.peekNextChar()))
  200. {
  201. source.skip();
  202. ++numExpDigits;
  203. }
  204. if (numExpDigits == 0)
  205. return false;
  206. }
  207. c = source.peekNextChar();
  208. if (c == 'f' || c == 'F')
  209. source.skip();
  210. else if (! (hasExponent || hasPoint))
  211. return false;
  212. return true;
  213. }
  214. static int parseNumber (CodeDocument::Iterator& source)
  215. {
  216. const CodeDocument::Iterator original (source);
  217. if (parseFloatLiteral (source))
  218. return tokenType_float;
  219. source = original;
  220. if (parseHexLiteral (source))
  221. return tokenType_integer;
  222. source = original;
  223. if (parseOctalLiteral (source))
  224. return tokenType_integer;
  225. source = original;
  226. if (parseDecimalLiteral (source))
  227. return tokenType_integer;
  228. source = original;
  229. source.skip();
  230. return tokenType_error;
  231. }
  232. static void skipQuotedString (CodeDocument::Iterator& source) noexcept
  233. {
  234. const juce_wchar quote = source.nextChar();
  235. for (;;)
  236. {
  237. const juce_wchar c = source.nextChar();
  238. if (c == quote || c == 0)
  239. break;
  240. if (c == '\\')
  241. source.skip();
  242. }
  243. }
  244. static void skipComment (CodeDocument::Iterator& source) noexcept
  245. {
  246. bool lastWasStar = false;
  247. for (;;)
  248. {
  249. const juce_wchar c = source.nextChar();
  250. if (c == 0 || (c == '/' && lastWasStar))
  251. break;
  252. lastWasStar = (c == '*');
  253. }
  254. }
  255. static void skipPreprocessorLine (CodeDocument::Iterator& source) noexcept
  256. {
  257. bool lastWasBackslash = false;
  258. for (;;)
  259. {
  260. const juce_wchar c = source.peekNextChar();
  261. if (c == '"')
  262. {
  263. skipQuotedString (source);
  264. continue;
  265. }
  266. if (c == '/')
  267. {
  268. CodeDocument::Iterator next (source);
  269. next.skip();
  270. const juce_wchar c2 = next.peekNextChar();
  271. if (c2 == '/' || c2 == '*')
  272. return;
  273. }
  274. if (c == 0)
  275. break;
  276. if (c == '\n' || c == '\r')
  277. {
  278. source.skipToEndOfLine();
  279. if (lastWasBackslash)
  280. skipPreprocessorLine (source);
  281. break;
  282. }
  283. lastWasBackslash = (c == '\\');
  284. source.skip();
  285. }
  286. }
  287. static void skipIfNextCharMatches (CodeDocument::Iterator& source, const juce_wchar c) noexcept
  288. {
  289. if (source.peekNextChar() == c)
  290. source.skip();
  291. }
  292. static void skipIfNextCharMatches (CodeDocument::Iterator& source,
  293. const juce_wchar c1, const juce_wchar c2) noexcept
  294. {
  295. const juce_wchar c = source.peekNextChar();
  296. if (c == c1 || c == c2)
  297. source.skip();
  298. }
  299. }
  300. //==============================================================================
  301. CPlusPlusCodeTokeniser::CPlusPlusCodeTokeniser() {}
  302. CPlusPlusCodeTokeniser::~CPlusPlusCodeTokeniser() {}
  303. int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
  304. {
  305. using namespace CppTokeniser;
  306. int result = tokenType_error;
  307. source.skipWhitespace();
  308. const juce_wchar firstChar = source.peekNextChar();
  309. switch (firstChar)
  310. {
  311. case 0:
  312. source.skip();
  313. break;
  314. case '0':
  315. case '1':
  316. case '2':
  317. case '3':
  318. case '4':
  319. case '5':
  320. case '6':
  321. case '7':
  322. case '8':
  323. case '9':
  324. result = parseNumber (source);
  325. break;
  326. case '.':
  327. result = parseNumber (source);
  328. if (result == tokenType_error)
  329. result = tokenType_punctuation;
  330. break;
  331. case ',':
  332. case ';':
  333. case ':':
  334. source.skip();
  335. result = tokenType_punctuation;
  336. break;
  337. case '(':
  338. case ')':
  339. case '{':
  340. case '}':
  341. case '[':
  342. case ']':
  343. source.skip();
  344. result = tokenType_bracket;
  345. break;
  346. case '"':
  347. case '\'':
  348. skipQuotedString (source);
  349. result = tokenType_string;
  350. break;
  351. case '+':
  352. result = tokenType_operator;
  353. source.skip();
  354. skipIfNextCharMatches (source, '+', '=');
  355. break;
  356. case '-':
  357. source.skip();
  358. result = parseNumber (source);
  359. if (result == tokenType_error)
  360. {
  361. result = tokenType_operator;
  362. skipIfNextCharMatches (source, '-', '=');
  363. }
  364. break;
  365. case '*':
  366. case '%':
  367. case '=':
  368. case '!':
  369. result = tokenType_operator;
  370. source.skip();
  371. skipIfNextCharMatches (source, '=');
  372. break;
  373. case '/':
  374. result = tokenType_operator;
  375. source.skip();
  376. if (source.peekNextChar() == '=')
  377. {
  378. source.skip();
  379. }
  380. else if (source.peekNextChar() == '/')
  381. {
  382. result = tokenType_comment;
  383. source.skipToEndOfLine();
  384. }
  385. else if (source.peekNextChar() == '*')
  386. {
  387. source.skip();
  388. result = tokenType_comment;
  389. skipComment (source);
  390. }
  391. break;
  392. case '?':
  393. case '~':
  394. source.skip();
  395. result = tokenType_operator;
  396. break;
  397. case '<':
  398. case '>':
  399. case '|':
  400. case '&':
  401. case '^':
  402. source.skip();
  403. result = tokenType_operator;
  404. skipIfNextCharMatches (source, firstChar);
  405. skipIfNextCharMatches (source, '=');
  406. break;
  407. case '#':
  408. result = tokenType_preprocessor;
  409. skipPreprocessorLine (source);
  410. break;
  411. default:
  412. if (isIdentifierStart (firstChar))
  413. result = parseIdentifier (source);
  414. else
  415. source.skip();
  416. break;
  417. }
  418. return result;
  419. }
  420. CodeEditorComponent::ColourScheme CPlusPlusCodeTokeniser::getDefaultColourScheme()
  421. {
  422. struct Type
  423. {
  424. const char* name;
  425. uint32 colour;
  426. };
  427. const Type types[] =
  428. {
  429. { "Error", 0xffcc0000 },
  430. { "Comment", 0xff00aa00 },
  431. { "Keyword", 0xff0000cc },
  432. { "Operator", 0xff225500 },
  433. { "Identifier", 0xff000000 },
  434. { "Integer", 0xff880000 },
  435. { "Float", 0xff885500 },
  436. { "String", 0xff990099 },
  437. { "Bracket", 0xff000055 },
  438. { "Punctuation", 0xff004400 },
  439. { "Preprocessor Text", 0xff660000 }
  440. };
  441. CodeEditorComponent::ColourScheme cs;
  442. for (int i = 0; i < sizeof (types) / sizeof (types[0]); ++i) // (NB: numElementsInArray doesn't work here in GCC4.2)
  443. cs.set (types[i].name, Colour (types[i].colour));
  444. return cs;
  445. }
  446. bool CPlusPlusCodeTokeniser::isReservedKeyword (const String& token) noexcept
  447. {
  448. return CppTokeniser::isReservedKeyword (token.getCharPointer(), token.length());
  449. }