The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

619 lines
16KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-10 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. #include "../../../core/juce_StandardHeader.h"
  19. BEGIN_JUCE_NAMESPACE
  20. #include "juce_CPlusPlusCodeTokeniser.h"
  21. #include "../../graphics/colour/juce_Colours.h"
  22. CPlusPlusCodeTokeniser::CPlusPlusCodeTokeniser()
  23. {
  24. }
  25. CPlusPlusCodeTokeniser::~CPlusPlusCodeTokeniser()
  26. {
  27. }
  28. //==============================================================================
  29. namespace CppTokeniser
  30. {
  31. bool isIdentifierStart (const juce_wchar c) throw()
  32. {
  33. return CharacterFunctions::isLetter (c)
  34. || c == '_' || c == '@';
  35. }
  36. bool isIdentifierBody (const juce_wchar c) throw()
  37. {
  38. return CharacterFunctions::isLetterOrDigit (c)
  39. || c == '_' || c == '@';
  40. }
  41. bool isReservedKeyword (String::CharPointerType token, const int tokenLength) throw()
  42. {
  43. static const char* const keywords2Char[] =
  44. { "if", "do", "or", "id", 0 };
  45. static const char* const keywords3Char[] =
  46. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  47. static const char* const keywords4Char[] =
  48. { "bool", "void", "this", "true", "long", "else", "char",
  49. "enum", "case", "goto", "auto", 0 };
  50. static const char* const keywords5Char[] =
  51. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  52. "float", "short", "throw", "union", "using", "or_eq", 0 };
  53. static const char* const keywords6Char[] =
  54. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  55. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  56. "switch", "typeid", "wchar_t", "xor_eq", 0};
  57. static const char* const keywordsOther[] =
  58. { "const_cast", "continue", "default", "explicit", "mutable", "namespace",
  59. "operator", "private", "protected", "register", "reinterpret_cast", "static_cast",
  60. "template", "typedef", "typename", "unsigned", "virtual", "volatile",
  61. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  62. "@private", "@property", "@protected", "@class", 0 };
  63. const char* const* k;
  64. switch (tokenLength)
  65. {
  66. case 2: k = keywords2Char; break;
  67. case 3: k = keywords3Char; break;
  68. case 4: k = keywords4Char; break;
  69. case 5: k = keywords5Char; break;
  70. case 6: k = keywords6Char; break;
  71. default:
  72. if (tokenLength < 2 || tokenLength > 16)
  73. return false;
  74. k = keywordsOther;
  75. break;
  76. }
  77. int i = 0;
  78. while (k[i] != 0)
  79. {
  80. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  81. return true;
  82. ++i;
  83. }
  84. return false;
  85. }
  86. int parseIdentifier (CodeDocument::Iterator& source) throw()
  87. {
  88. int tokenLength = 0;
  89. juce_wchar possibleIdentifier [19];
  90. while (isIdentifierBody (source.peekNextChar()))
  91. {
  92. const juce_wchar c = source.nextChar();
  93. if (tokenLength < numElementsInArray (possibleIdentifier) - 1)
  94. possibleIdentifier [tokenLength] = c;
  95. ++tokenLength;
  96. }
  97. if (tokenLength > 1 && tokenLength <= 16)
  98. {
  99. possibleIdentifier [tokenLength] = 0;
  100. if (isReservedKeyword (CharPointer_UTF32 (possibleIdentifier), tokenLength))
  101. return CPlusPlusCodeTokeniser::tokenType_builtInKeyword;
  102. }
  103. return CPlusPlusCodeTokeniser::tokenType_identifier;
  104. }
  105. bool skipNumberSuffix (CodeDocument::Iterator& source)
  106. {
  107. const juce_wchar c = source.peekNextChar();
  108. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  109. source.skip();
  110. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  111. return false;
  112. return true;
  113. }
  114. bool isHexDigit (const juce_wchar c) throw()
  115. {
  116. return (c >= '0' && c <= '9')
  117. || (c >= 'a' && c <= 'f')
  118. || (c >= 'A' && c <= 'F');
  119. }
  120. bool parseHexLiteral (CodeDocument::Iterator& source) throw()
  121. {
  122. if (source.nextChar() != '0')
  123. return false;
  124. juce_wchar c = source.nextChar();
  125. if (c != 'x' && c != 'X')
  126. return false;
  127. int numDigits = 0;
  128. while (isHexDigit (source.peekNextChar()))
  129. {
  130. ++numDigits;
  131. source.skip();
  132. }
  133. if (numDigits == 0)
  134. return false;
  135. return skipNumberSuffix (source);
  136. }
  137. bool isOctalDigit (const juce_wchar c) throw()
  138. {
  139. return c >= '0' && c <= '7';
  140. }
  141. bool parseOctalLiteral (CodeDocument::Iterator& source) throw()
  142. {
  143. if (source.nextChar() != '0')
  144. return false;
  145. if (! isOctalDigit (source.nextChar()))
  146. return false;
  147. while (isOctalDigit (source.peekNextChar()))
  148. source.skip();
  149. return skipNumberSuffix (source);
  150. }
  151. bool isDecimalDigit (const juce_wchar c) throw()
  152. {
  153. return c >= '0' && c <= '9';
  154. }
  155. bool parseDecimalLiteral (CodeDocument::Iterator& source) throw()
  156. {
  157. int numChars = 0;
  158. while (isDecimalDigit (source.peekNextChar()))
  159. {
  160. ++numChars;
  161. source.skip();
  162. }
  163. if (numChars == 0)
  164. return false;
  165. return skipNumberSuffix (source);
  166. }
  167. bool parseFloatLiteral (CodeDocument::Iterator& source) throw()
  168. {
  169. int numDigits = 0;
  170. while (isDecimalDigit (source.peekNextChar()))
  171. {
  172. source.skip();
  173. ++numDigits;
  174. }
  175. const bool hasPoint = (source.peekNextChar() == '.');
  176. if (hasPoint)
  177. {
  178. source.skip();
  179. while (isDecimalDigit (source.peekNextChar()))
  180. {
  181. source.skip();
  182. ++numDigits;
  183. }
  184. }
  185. if (numDigits == 0)
  186. return false;
  187. juce_wchar c = source.peekNextChar();
  188. const bool hasExponent = (c == 'e' || c == 'E');
  189. if (hasExponent)
  190. {
  191. source.skip();
  192. c = source.peekNextChar();
  193. if (c == '+' || c == '-')
  194. source.skip();
  195. int numExpDigits = 0;
  196. while (isDecimalDigit (source.peekNextChar()))
  197. {
  198. source.skip();
  199. ++numExpDigits;
  200. }
  201. if (numExpDigits == 0)
  202. return false;
  203. }
  204. c = source.peekNextChar();
  205. if (c == 'f' || c == 'F')
  206. source.skip();
  207. else if (! (hasExponent || hasPoint))
  208. return false;
  209. return true;
  210. }
  211. int parseNumber (CodeDocument::Iterator& source)
  212. {
  213. const CodeDocument::Iterator original (source);
  214. if (parseFloatLiteral (source))
  215. return CPlusPlusCodeTokeniser::tokenType_floatLiteral;
  216. source = original;
  217. if (parseHexLiteral (source))
  218. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  219. source = original;
  220. if (parseOctalLiteral (source))
  221. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  222. source = original;
  223. if (parseDecimalLiteral (source))
  224. return CPlusPlusCodeTokeniser::tokenType_integerLiteral;
  225. source = original;
  226. source.skip();
  227. return CPlusPlusCodeTokeniser::tokenType_error;
  228. }
  229. void skipQuotedString (CodeDocument::Iterator& source) throw()
  230. {
  231. const juce_wchar quote = source.nextChar();
  232. for (;;)
  233. {
  234. const juce_wchar c = source.nextChar();
  235. if (c == quote || c == 0)
  236. break;
  237. if (c == '\\')
  238. source.skip();
  239. }
  240. }
  241. void skipComment (CodeDocument::Iterator& source) throw()
  242. {
  243. bool lastWasStar = false;
  244. for (;;)
  245. {
  246. const juce_wchar c = source.nextChar();
  247. if (c == 0 || (c == '/' && lastWasStar))
  248. break;
  249. lastWasStar = (c == '*');
  250. }
  251. }
  252. }
  253. //==============================================================================
  254. int CPlusPlusCodeTokeniser::readNextToken (CodeDocument::Iterator& source)
  255. {
  256. int result = tokenType_error;
  257. source.skipWhitespace();
  258. juce_wchar firstChar = source.peekNextChar();
  259. switch (firstChar)
  260. {
  261. case 0:
  262. source.skip();
  263. break;
  264. case '0':
  265. case '1':
  266. case '2':
  267. case '3':
  268. case '4':
  269. case '5':
  270. case '6':
  271. case '7':
  272. case '8':
  273. case '9':
  274. result = CppTokeniser::parseNumber (source);
  275. break;
  276. case '.':
  277. result = CppTokeniser::parseNumber (source);
  278. if (result == tokenType_error)
  279. result = tokenType_punctuation;
  280. break;
  281. case ',':
  282. case ';':
  283. case ':':
  284. source.skip();
  285. result = tokenType_punctuation;
  286. break;
  287. case '(':
  288. case ')':
  289. case '{':
  290. case '}':
  291. case '[':
  292. case ']':
  293. source.skip();
  294. result = tokenType_bracket;
  295. break;
  296. case '"':
  297. case '\'':
  298. CppTokeniser::skipQuotedString (source);
  299. result = tokenType_stringLiteral;
  300. break;
  301. case '+':
  302. result = tokenType_operator;
  303. source.skip();
  304. if (source.peekNextChar() == '+')
  305. source.skip();
  306. else if (source.peekNextChar() == '=')
  307. source.skip();
  308. break;
  309. case '-':
  310. source.skip();
  311. result = CppTokeniser::parseNumber (source);
  312. if (result == tokenType_error)
  313. {
  314. result = tokenType_operator;
  315. if (source.peekNextChar() == '-')
  316. source.skip();
  317. else if (source.peekNextChar() == '=')
  318. source.skip();
  319. }
  320. break;
  321. case '*':
  322. case '%':
  323. case '=':
  324. case '!':
  325. result = tokenType_operator;
  326. source.skip();
  327. if (source.peekNextChar() == '=')
  328. source.skip();
  329. break;
  330. case '/':
  331. result = tokenType_operator;
  332. source.skip();
  333. if (source.peekNextChar() == '=')
  334. {
  335. source.skip();
  336. }
  337. else if (source.peekNextChar() == '/')
  338. {
  339. result = tokenType_comment;
  340. source.skipToEndOfLine();
  341. }
  342. else if (source.peekNextChar() == '*')
  343. {
  344. source.skip();
  345. result = tokenType_comment;
  346. CppTokeniser::skipComment (source);
  347. }
  348. break;
  349. case '?':
  350. case '~':
  351. source.skip();
  352. result = tokenType_operator;
  353. break;
  354. case '<':
  355. source.skip();
  356. result = tokenType_operator;
  357. if (source.peekNextChar() == '=')
  358. {
  359. source.skip();
  360. }
  361. else if (source.peekNextChar() == '<')
  362. {
  363. source.skip();
  364. if (source.peekNextChar() == '=')
  365. source.skip();
  366. }
  367. break;
  368. case '>':
  369. source.skip();
  370. result = tokenType_operator;
  371. if (source.peekNextChar() == '=')
  372. {
  373. source.skip();
  374. }
  375. else if (source.peekNextChar() == '<')
  376. {
  377. source.skip();
  378. if (source.peekNextChar() == '=')
  379. source.skip();
  380. }
  381. break;
  382. case '|':
  383. source.skip();
  384. result = tokenType_operator;
  385. if (source.peekNextChar() == '=')
  386. {
  387. source.skip();
  388. }
  389. else if (source.peekNextChar() == '|')
  390. {
  391. source.skip();
  392. if (source.peekNextChar() == '=')
  393. source.skip();
  394. }
  395. break;
  396. case '&':
  397. source.skip();
  398. result = tokenType_operator;
  399. if (source.peekNextChar() == '=')
  400. {
  401. source.skip();
  402. }
  403. else if (source.peekNextChar() == '&')
  404. {
  405. source.skip();
  406. if (source.peekNextChar() == '=')
  407. source.skip();
  408. }
  409. break;
  410. case '^':
  411. source.skip();
  412. result = tokenType_operator;
  413. if (source.peekNextChar() == '=')
  414. {
  415. source.skip();
  416. }
  417. else if (source.peekNextChar() == '^')
  418. {
  419. source.skip();
  420. if (source.peekNextChar() == '=')
  421. source.skip();
  422. }
  423. break;
  424. case '#':
  425. result = tokenType_preprocessor;
  426. source.skipToEndOfLine();
  427. break;
  428. default:
  429. if (CppTokeniser::isIdentifierStart (firstChar))
  430. result = CppTokeniser::parseIdentifier (source);
  431. else
  432. source.skip();
  433. break;
  434. }
  435. return result;
  436. }
  437. const StringArray CPlusPlusCodeTokeniser::getTokenTypes()
  438. {
  439. const char* const types[] =
  440. {
  441. "Error",
  442. "Comment",
  443. "C++ keyword",
  444. "Identifier",
  445. "Integer literal",
  446. "Float literal",
  447. "String literal",
  448. "Operator",
  449. "Bracket",
  450. "Punctuation",
  451. "Preprocessor line",
  452. 0
  453. };
  454. return StringArray (types);
  455. }
  456. const Colour CPlusPlusCodeTokeniser::getDefaultColour (const int tokenType)
  457. {
  458. const uint32 colours[] =
  459. {
  460. 0xffcc0000, // error
  461. 0xff00aa00, // comment
  462. 0xff0000cc, // keyword
  463. 0xff000000, // identifier
  464. 0xff880000, // int literal
  465. 0xff885500, // float literal
  466. 0xff990099, // string literal
  467. 0xff225500, // operator
  468. 0xff000055, // bracket
  469. 0xff004400, // punctuation
  470. 0xff660000 // preprocessor
  471. };
  472. if (tokenType >= 0 && tokenType < numElementsInArray (colours))
  473. return Colour (colours [tokenType]);
  474. return Colours::black;
  475. }
  476. bool CPlusPlusCodeTokeniser::isReservedKeyword (const String& token) throw()
  477. {
  478. return CppTokeniser::isReservedKeyword (token.getCharPointer(), token.length());
  479. }
  480. END_JUCE_NAMESPACE