The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

541 lines
15KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. /** Some basic functions for simple tokenising of C++ code. */
  19. struct CppTokeniserFunctions
  20. {
  21. static bool isIdentifierStart (const juce_wchar c) noexcept
  22. {
  23. return CharacterFunctions::isLetter (c)
  24. || c == '_' || c == '@';
  25. }
  26. static bool isIdentifierBody (const juce_wchar c) noexcept
  27. {
  28. return CharacterFunctions::isLetterOrDigit (c)
  29. || c == '_' || c == '@';
  30. }
  31. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  32. {
  33. static const char* const keywords2Char[] =
  34. { "if", "do", "or", "id", 0 };
  35. static const char* const keywords3Char[] =
  36. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  37. static const char* const keywords4Char[] =
  38. { "bool", "void", "this", "true", "long", "else", "char",
  39. "enum", "case", "goto", "auto", 0 };
  40. static const char* const keywords5Char[] =
  41. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  42. "float", "short", "throw", "union", "using", "or_eq", 0 };
  43. static const char* const keywords6Char[] =
  44. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  45. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  46. "switch", "typeid", "wchar_t", "xor_eq", 0};
  47. static const char* const keywords7Char[] =
  48. { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };
  49. static const char* const keywordsOther[] =
  50. { "noexcept", "const_cast", "continue", "explicit", "namespace",
  51. "operator", "protected", "register", "reinterpret_cast", "static_cast",
  52. "template", "typename", "unsigned", "volatile", "constexpr",
  53. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  54. "@private", "@property", "@protected", "@class", 0 };
  55. const char* const* k;
  56. switch (tokenLength)
  57. {
  58. case 2: k = keywords2Char; break;
  59. case 3: k = keywords3Char; break;
  60. case 4: k = keywords4Char; break;
  61. case 5: k = keywords5Char; break;
  62. case 6: k = keywords6Char; break;
  63. case 7: k = keywords7Char; break;
  64. default:
  65. if (tokenLength < 2 || tokenLength > 16)
  66. return false;
  67. k = keywordsOther;
  68. break;
  69. }
  70. for (int i = 0; k[i] != 0; ++i)
  71. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  72. return true;
  73. return false;
  74. }
  75. template<class Iterator>
  76. static int parseIdentifier (Iterator& source) noexcept
  77. {
  78. int tokenLength = 0;
  79. String::CharPointerType::CharType possibleIdentifier [100];
  80. String::CharPointerType possible (possibleIdentifier);
  81. while (isIdentifierBody (source.peekNextChar()))
  82. {
  83. const juce_wchar c = source.nextChar();
  84. if (tokenLength < 20)
  85. possible.write (c);
  86. ++tokenLength;
  87. }
  88. if (tokenLength > 1 && tokenLength <= 16)
  89. {
  90. possible.writeNull();
  91. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  92. return CPlusPlusCodeTokeniser::tokenType_keyword;
  93. }
  94. return CPlusPlusCodeTokeniser::tokenType_identifier;
  95. }
  96. template<class Iterator>
  97. static bool skipNumberSuffix (Iterator& source)
  98. {
  99. const juce_wchar c = source.peekNextChar();
  100. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  101. source.skip();
  102. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  103. return false;
  104. return true;
  105. }
  106. static bool isHexDigit (const juce_wchar c) noexcept
  107. {
  108. return (c >= '0' && c <= '9')
  109. || (c >= 'a' && c <= 'f')
  110. || (c >= 'A' && c <= 'F');
  111. }
  112. template<class Iterator>
  113. static bool parseHexLiteral (Iterator& source) noexcept
  114. {
  115. if (source.peekNextChar() == '-')
  116. source.skip();
  117. if (source.nextChar() != '0')
  118. return false;
  119. juce_wchar c = source.nextChar();
  120. if (c != 'x' && c != 'X')
  121. return false;
  122. int numDigits = 0;
  123. while (isHexDigit (source.peekNextChar()))
  124. {
  125. ++numDigits;
  126. source.skip();
  127. }
  128. if (numDigits == 0)
  129. return false;
  130. return skipNumberSuffix (source);
  131. }
  132. static bool isOctalDigit (const juce_wchar c) noexcept
  133. {
  134. return c >= '0' && c <= '7';
  135. }
  136. template<class Iterator>
  137. static bool parseOctalLiteral (Iterator& source) noexcept
  138. {
  139. if (source.peekNextChar() == '-')
  140. source.skip();
  141. if (source.nextChar() != '0')
  142. return false;
  143. if (! isOctalDigit (source.nextChar()))
  144. return false;
  145. while (isOctalDigit (source.peekNextChar()))
  146. source.skip();
  147. return skipNumberSuffix (source);
  148. }
  149. static bool isDecimalDigit (const juce_wchar c) noexcept
  150. {
  151. return c >= '0' && c <= '9';
  152. }
  153. template<class Iterator>
  154. static bool parseDecimalLiteral (Iterator& source) noexcept
  155. {
  156. if (source.peekNextChar() == '-')
  157. source.skip();
  158. int numChars = 0;
  159. while (isDecimalDigit (source.peekNextChar()))
  160. {
  161. ++numChars;
  162. source.skip();
  163. }
  164. if (numChars == 0)
  165. return false;
  166. return skipNumberSuffix (source);
  167. }
  168. template<class Iterator>
  169. static bool parseFloatLiteral (Iterator& source) noexcept
  170. {
  171. if (source.peekNextChar() == '-')
  172. source.skip();
  173. int numDigits = 0;
  174. while (isDecimalDigit (source.peekNextChar()))
  175. {
  176. source.skip();
  177. ++numDigits;
  178. }
  179. const bool hasPoint = (source.peekNextChar() == '.');
  180. if (hasPoint)
  181. {
  182. source.skip();
  183. while (isDecimalDigit (source.peekNextChar()))
  184. {
  185. source.skip();
  186. ++numDigits;
  187. }
  188. }
  189. if (numDigits == 0)
  190. return false;
  191. juce_wchar c = source.peekNextChar();
  192. const bool hasExponent = (c == 'e' || c == 'E');
  193. if (hasExponent)
  194. {
  195. source.skip();
  196. c = source.peekNextChar();
  197. if (c == '+' || c == '-')
  198. source.skip();
  199. int numExpDigits = 0;
  200. while (isDecimalDigit (source.peekNextChar()))
  201. {
  202. source.skip();
  203. ++numExpDigits;
  204. }
  205. if (numExpDigits == 0)
  206. return false;
  207. }
  208. c = source.peekNextChar();
  209. if (c == 'f' || c == 'F')
  210. source.skip();
  211. else if (! (hasExponent || hasPoint))
  212. return false;
  213. return true;
  214. }
  215. template<class Iterator>
  216. static int parseNumber (Iterator& source)
  217. {
  218. const Iterator original (source);
  219. if (parseFloatLiteral (source))
  220. return CPlusPlusCodeTokeniser::tokenType_float;
  221. source = original;
  222. if (parseHexLiteral (source))
  223. return CPlusPlusCodeTokeniser::tokenType_integer;
  224. source = original;
  225. if (parseOctalLiteral (source))
  226. return CPlusPlusCodeTokeniser::tokenType_integer;
  227. source = original;
  228. if (parseDecimalLiteral (source))
  229. return CPlusPlusCodeTokeniser::tokenType_integer;
  230. source = original;
  231. return CPlusPlusCodeTokeniser::tokenType_error;
  232. }
  233. template<class Iterator>
  234. static void skipQuotedString (Iterator& source) noexcept
  235. {
  236. const juce_wchar quote = source.nextChar();
  237. for (;;)
  238. {
  239. const juce_wchar c = source.nextChar();
  240. if (c == quote || c == 0)
  241. break;
  242. if (c == '\\')
  243. source.skip();
  244. }
  245. }
  246. template<class Iterator>
  247. static void skipComment (Iterator& source) noexcept
  248. {
  249. bool lastWasStar = false;
  250. for (;;)
  251. {
  252. const juce_wchar c = source.nextChar();
  253. if (c == 0 || (c == '/' && lastWasStar))
  254. break;
  255. lastWasStar = (c == '*');
  256. }
  257. }
  258. template<class Iterator>
  259. static void skipPreprocessorLine (Iterator& source) noexcept
  260. {
  261. bool lastWasBackslash = false;
  262. for (;;)
  263. {
  264. const juce_wchar c = source.peekNextChar();
  265. if (c == '"')
  266. {
  267. skipQuotedString (source);
  268. continue;
  269. }
  270. if (c == '/')
  271. {
  272. Iterator next (source);
  273. next.skip();
  274. const juce_wchar c2 = next.peekNextChar();
  275. if (c2 == '/' || c2 == '*')
  276. return;
  277. }
  278. if (c == 0)
  279. break;
  280. if (c == '\n' || c == '\r')
  281. {
  282. source.skipToEndOfLine();
  283. if (lastWasBackslash)
  284. skipPreprocessorLine (source);
  285. break;
  286. }
  287. lastWasBackslash = (c == '\\');
  288. source.skip();
  289. }
  290. }
  291. template<class Iterator>
  292. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
  293. {
  294. if (source.peekNextChar() == c)
  295. source.skip();
  296. }
  297. template<class Iterator>
  298. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
  299. {
  300. const juce_wchar c = source.peekNextChar();
  301. if (c == c1 || c == c2)
  302. source.skip();
  303. }
  304. template<class Iterator>
  305. static int readNextToken (Iterator& source)
  306. {
  307. int result = CPlusPlusCodeTokeniser::tokenType_error;
  308. source.skipWhitespace();
  309. const juce_wchar firstChar = source.peekNextChar();
  310. switch (firstChar)
  311. {
  312. case 0:
  313. source.skip();
  314. break;
  315. case '0':
  316. case '1':
  317. case '2':
  318. case '3':
  319. case '4':
  320. case '5':
  321. case '6':
  322. case '7':
  323. case '8':
  324. case '9':
  325. case '.':
  326. result = parseNumber (source);
  327. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  328. {
  329. source.skip();
  330. if (firstChar == '.')
  331. result = CPlusPlusCodeTokeniser::tokenType_punctuation;
  332. }
  333. break;
  334. case ',':
  335. case ';':
  336. case ':':
  337. source.skip();
  338. result = CPlusPlusCodeTokeniser::tokenType_punctuation;
  339. break;
  340. case '(':
  341. case ')':
  342. case '{':
  343. case '}':
  344. case '[':
  345. case ']':
  346. source.skip();
  347. result = CPlusPlusCodeTokeniser::tokenType_bracket;
  348. break;
  349. case '"':
  350. case '\'':
  351. skipQuotedString (source);
  352. result = CPlusPlusCodeTokeniser::tokenType_string;
  353. break;
  354. case '+':
  355. result = CPlusPlusCodeTokeniser::tokenType_operator;
  356. source.skip();
  357. skipIfNextCharMatches (source, '+', '=');
  358. break;
  359. case '-':
  360. source.skip();
  361. result = parseNumber (source);
  362. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  363. {
  364. result = CPlusPlusCodeTokeniser::tokenType_operator;
  365. skipIfNextCharMatches (source, '-', '=');
  366. }
  367. break;
  368. case '*':
  369. case '%':
  370. case '=':
  371. case '!':
  372. result = CPlusPlusCodeTokeniser::tokenType_operator;
  373. source.skip();
  374. skipIfNextCharMatches (source, '=');
  375. break;
  376. case '/':
  377. result = CPlusPlusCodeTokeniser::tokenType_operator;
  378. source.skip();
  379. if (source.peekNextChar() == '=')
  380. {
  381. source.skip();
  382. }
  383. else if (source.peekNextChar() == '/')
  384. {
  385. result = CPlusPlusCodeTokeniser::tokenType_comment;
  386. source.skipToEndOfLine();
  387. }
  388. else if (source.peekNextChar() == '*')
  389. {
  390. source.skip();
  391. result = CPlusPlusCodeTokeniser::tokenType_comment;
  392. skipComment (source);
  393. }
  394. break;
  395. case '?':
  396. case '~':
  397. source.skip();
  398. result = CPlusPlusCodeTokeniser::tokenType_operator;
  399. break;
  400. case '<':
  401. case '>':
  402. case '|':
  403. case '&':
  404. case '^':
  405. source.skip();
  406. result = CPlusPlusCodeTokeniser::tokenType_operator;
  407. skipIfNextCharMatches (source, firstChar);
  408. skipIfNextCharMatches (source, '=');
  409. break;
  410. case '#':
  411. result = CPlusPlusCodeTokeniser::tokenType_preprocessor;
  412. skipPreprocessorLine (source);
  413. break;
  414. default:
  415. if (isIdentifierStart (firstChar))
  416. result = parseIdentifier (source);
  417. else
  418. source.skip();
  419. break;
  420. }
  421. return result;
  422. }
  423. };