The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

531 lines
14KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. namespace CppTokeniser
  19. {
  20. enum TokenType
  21. {
  22. tokenType_error = 0,
  23. tokenType_comment,
  24. tokenType_keyword,
  25. tokenType_operator,
  26. tokenType_identifier,
  27. tokenType_integer,
  28. tokenType_float,
  29. tokenType_string,
  30. tokenType_bracket,
  31. tokenType_punctuation,
  32. tokenType_preprocessor
  33. };
  34. static bool isIdentifierStart (const juce_wchar c) noexcept
  35. {
  36. return CharacterFunctions::isLetter (c)
  37. || c == '_' || c == '@';
  38. }
  39. static bool isIdentifierBody (const juce_wchar c) noexcept
  40. {
  41. return CharacterFunctions::isLetterOrDigit (c)
  42. || c == '_' || c == '@';
  43. }
  44. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  45. {
  46. static const char* const keywords2Char[] =
  47. { "if", "do", "or", "id", 0 };
  48. static const char* const keywords3Char[] =
  49. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  50. static const char* const keywords4Char[] =
  51. { "bool", "void", "this", "true", "long", "else", "char",
  52. "enum", "case", "goto", "auto", 0 };
  53. static const char* const keywords5Char[] =
  54. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  55. "float", "short", "throw", "union", "using", "or_eq", 0 };
  56. static const char* const keywords6Char[] =
  57. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  58. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  59. "switch", "typeid", "wchar_t", "xor_eq", 0};
  60. static const char* const keywords7Char[] =
  61. { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };
  62. static const char* const keywordsOther[] =
  63. { "noexcept", "const_cast", "continue", "explicit", "namespace",
  64. "operator", "protected", "register", "reinterpret_cast", "static_cast",
  65. "template", "typename", "unsigned", "volatile", "constexpr",
  66. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  67. "@private", "@property", "@protected", "@class", 0 };
  68. const char* const* k;
  69. switch (tokenLength)
  70. {
  71. case 2: k = keywords2Char; break;
  72. case 3: k = keywords3Char; break;
  73. case 4: k = keywords4Char; break;
  74. case 5: k = keywords5Char; break;
  75. case 6: k = keywords6Char; break;
  76. case 7: k = keywords7Char; break;
  77. default:
  78. if (tokenLength < 2 || tokenLength > 16)
  79. return false;
  80. k = keywordsOther;
  81. break;
  82. }
  83. for (int i = 0; k[i] != 0; ++i)
  84. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  85. return true;
  86. return false;
  87. }
  88. static int parseIdentifier (CodeDocument::Iterator& source) noexcept
  89. {
  90. int tokenLength = 0;
  91. String::CharPointerType::CharType possibleIdentifier [100];
  92. String::CharPointerType possible (possibleIdentifier);
  93. while (isIdentifierBody (source.peekNextChar()))
  94. {
  95. const juce_wchar c = source.nextChar();
  96. if (tokenLength < 20)
  97. possible.write (c);
  98. ++tokenLength;
  99. }
  100. if (tokenLength > 1 && tokenLength <= 16)
  101. {
  102. possible.writeNull();
  103. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  104. return tokenType_keyword;
  105. }
  106. return tokenType_identifier;
  107. }
  108. static bool skipNumberSuffix (CodeDocument::Iterator& source)
  109. {
  110. const juce_wchar c = source.peekNextChar();
  111. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  112. source.skip();
  113. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  114. return false;
  115. return true;
  116. }
  117. static bool isHexDigit (const juce_wchar c) noexcept
  118. {
  119. return (c >= '0' && c <= '9')
  120. || (c >= 'a' && c <= 'f')
  121. || (c >= 'A' && c <= 'F');
  122. }
  123. static bool parseHexLiteral (CodeDocument::Iterator& source) noexcept
  124. {
  125. if (source.nextChar() != '0')
  126. return false;
  127. juce_wchar c = source.nextChar();
  128. if (c != 'x' && c != 'X')
  129. return false;
  130. int numDigits = 0;
  131. while (isHexDigit (source.peekNextChar()))
  132. {
  133. ++numDigits;
  134. source.skip();
  135. }
  136. if (numDigits == 0)
  137. return false;
  138. return skipNumberSuffix (source);
  139. }
  140. static bool isOctalDigit (const juce_wchar c) noexcept
  141. {
  142. return c >= '0' && c <= '7';
  143. }
  144. static bool parseOctalLiteral (CodeDocument::Iterator& source) noexcept
  145. {
  146. if (source.nextChar() != '0')
  147. return false;
  148. if (! isOctalDigit (source.nextChar()))
  149. return false;
  150. while (isOctalDigit (source.peekNextChar()))
  151. source.skip();
  152. return skipNumberSuffix (source);
  153. }
  154. static bool isDecimalDigit (const juce_wchar c) noexcept
  155. {
  156. return c >= '0' && c <= '9';
  157. }
  158. static bool parseDecimalLiteral (CodeDocument::Iterator& source) noexcept
  159. {
  160. int numChars = 0;
  161. while (isDecimalDigit (source.peekNextChar()))
  162. {
  163. ++numChars;
  164. source.skip();
  165. }
  166. if (numChars == 0)
  167. return false;
  168. return skipNumberSuffix (source);
  169. }
  170. static bool parseFloatLiteral (CodeDocument::Iterator& source) noexcept
  171. {
  172. int numDigits = 0;
  173. while (isDecimalDigit (source.peekNextChar()))
  174. {
  175. source.skip();
  176. ++numDigits;
  177. }
  178. const bool hasPoint = (source.peekNextChar() == '.');
  179. if (hasPoint)
  180. {
  181. source.skip();
  182. while (isDecimalDigit (source.peekNextChar()))
  183. {
  184. source.skip();
  185. ++numDigits;
  186. }
  187. }
  188. if (numDigits == 0)
  189. return false;
  190. juce_wchar c = source.peekNextChar();
  191. const bool hasExponent = (c == 'e' || c == 'E');
  192. if (hasExponent)
  193. {
  194. source.skip();
  195. c = source.peekNextChar();
  196. if (c == '+' || c == '-')
  197. source.skip();
  198. int numExpDigits = 0;
  199. while (isDecimalDigit (source.peekNextChar()))
  200. {
  201. source.skip();
  202. ++numExpDigits;
  203. }
  204. if (numExpDigits == 0)
  205. return false;
  206. }
  207. c = source.peekNextChar();
  208. if (c == 'f' || c == 'F')
  209. source.skip();
  210. else if (! (hasExponent || hasPoint))
  211. return false;
  212. return true;
  213. }
  214. static int parseNumber (CodeDocument::Iterator& source)
  215. {
  216. const CodeDocument::Iterator original (source);
  217. if (parseFloatLiteral (source))
  218. return tokenType_float;
  219. source = original;
  220. if (parseHexLiteral (source))
  221. return tokenType_integer;
  222. source = original;
  223. if (parseOctalLiteral (source))
  224. return tokenType_integer;
  225. source = original;
  226. if (parseDecimalLiteral (source))
  227. return tokenType_integer;
  228. source = original;
  229. source.skip();
  230. return tokenType_error;
  231. }
  232. static void skipQuotedString (CodeDocument::Iterator& source) noexcept
  233. {
  234. const juce_wchar quote = source.nextChar();
  235. for (;;)
  236. {
  237. const juce_wchar c = source.nextChar();
  238. if (c == quote || c == 0)
  239. break;
  240. if (c == '\\')
  241. source.skip();
  242. }
  243. }
  244. static void skipComment (CodeDocument::Iterator& source) noexcept
  245. {
  246. bool lastWasStar = false;
  247. for (;;)
  248. {
  249. const juce_wchar c = source.nextChar();
  250. if (c == 0 || (c == '/' && lastWasStar))
  251. break;
  252. lastWasStar = (c == '*');
  253. }
  254. }
  255. static void skipIfNextCharMatches (CodeDocument::Iterator& source, const juce_wchar c) noexcept
  256. {
  257. if (source.peekNextChar() == c)
  258. source.skip();
  259. }
  260. static void skipIfNextCharMatches (CodeDocument::Iterator& source,
  261. const juce_wchar c1, const juce_wchar c2) noexcept
  262. {
  263. const juce_wchar c = source.peekNextChar();
  264. if (c == c1 || c == c2)
  265. source.skip();
  266. }
  267. }
  268. //==============================================================================
  269. CPlusPlusCodeTokeniser::CPlusPlusCodeTokeniser() {}
  270. CPlusPlusCodeTokeniser::~CPlusPlusCodeTokeniser() {}
  271. int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
  272. {
  273. using namespace CppTokeniser;
  274. int result = tokenType_error;
  275. source.skipWhitespace();
  276. const juce_wchar firstChar = source.peekNextChar();
  277. switch (firstChar)
  278. {
  279. case 0:
  280. source.skip();
  281. break;
  282. case '0':
  283. case '1':
  284. case '2':
  285. case '3':
  286. case '4':
  287. case '5':
  288. case '6':
  289. case '7':
  290. case '8':
  291. case '9':
  292. result = parseNumber (source);
  293. break;
  294. case '.':
  295. result = parseNumber (source);
  296. if (result == tokenType_error)
  297. result = tokenType_punctuation;
  298. break;
  299. case ',':
  300. case ';':
  301. case ':':
  302. source.skip();
  303. result = tokenType_punctuation;
  304. break;
  305. case '(':
  306. case ')':
  307. case '{':
  308. case '}':
  309. case '[':
  310. case ']':
  311. source.skip();
  312. result = tokenType_bracket;
  313. break;
  314. case '"':
  315. case '\'':
  316. skipQuotedString (source);
  317. result = tokenType_string;
  318. break;
  319. case '+':
  320. result = tokenType_operator;
  321. source.skip();
  322. skipIfNextCharMatches (source, '+', '=');
  323. break;
  324. case '-':
  325. source.skip();
  326. result = parseNumber (source);
  327. if (result == tokenType_error)
  328. {
  329. result = tokenType_operator;
  330. skipIfNextCharMatches (source, '-', '=');
  331. }
  332. break;
  333. case '*':
  334. case '%':
  335. case '=':
  336. case '!':
  337. result = tokenType_operator;
  338. source.skip();
  339. skipIfNextCharMatches (source, '=');
  340. break;
  341. case '/':
  342. result = tokenType_operator;
  343. source.skip();
  344. if (source.peekNextChar() == '=')
  345. {
  346. source.skip();
  347. }
  348. else if (source.peekNextChar() == '/')
  349. {
  350. result = tokenType_comment;
  351. source.skipToEndOfLine();
  352. }
  353. else if (source.peekNextChar() == '*')
  354. {
  355. source.skip();
  356. result = tokenType_comment;
  357. skipComment (source);
  358. }
  359. break;
  360. case '?':
  361. case '~':
  362. source.skip();
  363. result = tokenType_operator;
  364. break;
  365. case '<':
  366. case '>':
  367. case '|':
  368. case '&':
  369. case '^':
  370. source.skip();
  371. result = tokenType_operator;
  372. skipIfNextCharMatches (source, firstChar);
  373. skipIfNextCharMatches (source, '=');
  374. break;
  375. case '#':
  376. result = tokenType_preprocessor;
  377. source.skipToEndOfLine();
  378. break;
  379. default:
  380. if (isIdentifierStart (firstChar))
  381. result = parseIdentifier (source);
  382. else
  383. source.skip();
  384. break;
  385. }
  386. return result;
  387. }
  388. CodeEditorComponent::ColourScheme CPlusPlusCodeTokeniser::getDefaultColourScheme()
  389. {
  390. struct Type
  391. {
  392. const char* name;
  393. uint32 colour;
  394. };
  395. const Type types[] =
  396. {
  397. { "Error", 0xffcc0000 },
  398. { "Comment", 0xff00aa00 },
  399. { "Keyword", 0xff0000cc },
  400. { "Operator", 0xff225500 },
  401. { "Identifier", 0xff000000 },
  402. { "Integer", 0xff880000 },
  403. { "Float", 0xff885500 },
  404. { "String", 0xff990099 },
  405. { "Bracket", 0xff000055 },
  406. { "Punctuation", 0xff004400 },
  407. { "Preprocessor Text", 0xff660000 }
  408. };
  409. CodeEditorComponent::ColourScheme cs;
  410. for (int i = 0; i < sizeof (types) / sizeof (types[0]); ++i) // (NB: numElementsInArray doesn't work here in GCC4.2)
  411. cs.set (types[i].name, Colour (types[i].colour));
  412. return cs;
  413. }
  414. bool CPlusPlusCodeTokeniser::isReservedKeyword (const String& token) noexcept
  415. {
  416. return CppTokeniser::isReservedKeyword (token.getCharPointer(), token.length());
  417. }