The JUCE cross-platform C++ framework, with DISTRHO/KXStudio specific changes
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

529 lines
15KB

  1. /*
  2. ==============================================================================
  3. This file is part of the JUCE library - "Jules' Utility Class Extensions"
  4. Copyright 2004-11 by Raw Material Software Ltd.
  5. ------------------------------------------------------------------------------
  6. JUCE can be redistributed and/or modified under the terms of the GNU General
  7. Public License (Version 2), as published by the Free Software Foundation.
  8. A copy of the license is included in the JUCE distribution, or can be found
  9. online at www.gnu.org/licenses.
  10. JUCE is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12. A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  13. ------------------------------------------------------------------------------
  14. To release a closed-source product which uses JUCE, commercial licenses are
  15. available: visit www.rawmaterialsoftware.com/juce for more information.
  16. ==============================================================================
  17. */
  18. /** Some basic functions for simple tokenising of C++ code. */
  19. struct CppTokeniserFunctions
  20. {
  21. static bool isIdentifierStart (const juce_wchar c) noexcept
  22. {
  23. return CharacterFunctions::isLetter (c)
  24. || c == '_' || c == '@';
  25. }
  26. static bool isIdentifierBody (const juce_wchar c) noexcept
  27. {
  28. return CharacterFunctions::isLetterOrDigit (c)
  29. || c == '_' || c == '@';
  30. }
  31. static bool isReservedKeyword (String::CharPointerType token, const int tokenLength) noexcept
  32. {
  33. static const char* const keywords2Char[] =
  34. { "if", "do", "or", "id", 0 };
  35. static const char* const keywords3Char[] =
  36. { "for", "int", "new", "try", "xor", "and", "asm", "not", 0 };
  37. static const char* const keywords4Char[] =
  38. { "bool", "void", "this", "true", "long", "else", "char",
  39. "enum", "case", "goto", "auto", 0 };
  40. static const char* const keywords5Char[] =
  41. { "while", "bitor", "break", "catch", "class", "compl", "const", "false",
  42. "float", "short", "throw", "union", "using", "or_eq", 0 };
  43. static const char* const keywords6Char[] =
  44. { "return", "struct", "and_eq", "bitand", "delete", "double", "extern",
  45. "friend", "inline", "not_eq", "public", "sizeof", "static", "signed",
  46. "switch", "typeid", "wchar_t", "xor_eq", 0};
  47. static const char* const keywords7Char[] =
  48. { "default", "mutable", "private", "typedef", "nullptr", "virtual", 0 };
  49. static const char* const keywordsOther[] =
  50. { "noexcept", "const_cast", "continue", "explicit", "namespace",
  51. "operator", "protected", "register", "reinterpret_cast", "static_cast",
  52. "template", "typename", "unsigned", "volatile", "constexpr",
  53. "@implementation", "@interface", "@end", "@synthesize", "@dynamic", "@public",
  54. "@private", "@property", "@protected", "@class", 0 };
  55. const char* const* k;
  56. switch (tokenLength)
  57. {
  58. case 2: k = keywords2Char; break;
  59. case 3: k = keywords3Char; break;
  60. case 4: k = keywords4Char; break;
  61. case 5: k = keywords5Char; break;
  62. case 6: k = keywords6Char; break;
  63. case 7: k = keywords7Char; break;
  64. default:
  65. if (tokenLength < 2 || tokenLength > 16)
  66. return false;
  67. k = keywordsOther;
  68. break;
  69. }
  70. for (int i = 0; k[i] != 0; ++i)
  71. if (token.compare (CharPointer_ASCII (k[i])) == 0)
  72. return true;
  73. return false;
  74. }
  75. template<class Iterator>
  76. static int parseIdentifier (Iterator& source) noexcept
  77. {
  78. int tokenLength = 0;
  79. String::CharPointerType::CharType possibleIdentifier [100];
  80. String::CharPointerType possible (possibleIdentifier);
  81. while (isIdentifierBody (source.peekNextChar()))
  82. {
  83. const juce_wchar c = source.nextChar();
  84. if (tokenLength < 20)
  85. possible.write (c);
  86. ++tokenLength;
  87. }
  88. if (tokenLength > 1 && tokenLength <= 16)
  89. {
  90. possible.writeNull();
  91. if (isReservedKeyword (String::CharPointerType (possibleIdentifier), tokenLength))
  92. return CPlusPlusCodeTokeniser::tokenType_keyword;
  93. }
  94. return CPlusPlusCodeTokeniser::tokenType_identifier;
  95. }
  96. template<class Iterator>
  97. static bool skipNumberSuffix (Iterator& source)
  98. {
  99. const juce_wchar c = source.peekNextChar();
  100. if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
  101. source.skip();
  102. if (CharacterFunctions::isLetterOrDigit (source.peekNextChar()))
  103. return false;
  104. return true;
  105. }
  106. static bool isHexDigit (const juce_wchar c) noexcept
  107. {
  108. return (c >= '0' && c <= '9')
  109. || (c >= 'a' && c <= 'f')
  110. || (c >= 'A' && c <= 'F');
  111. }
  112. template<class Iterator>
  113. static bool parseHexLiteral (Iterator& source) noexcept
  114. {
  115. if (source.nextChar() != '0')
  116. return false;
  117. juce_wchar c = source.nextChar();
  118. if (c != 'x' && c != 'X')
  119. return false;
  120. int numDigits = 0;
  121. while (isHexDigit (source.peekNextChar()))
  122. {
  123. ++numDigits;
  124. source.skip();
  125. }
  126. if (numDigits == 0)
  127. return false;
  128. return skipNumberSuffix (source);
  129. }
  130. static bool isOctalDigit (const juce_wchar c) noexcept
  131. {
  132. return c >= '0' && c <= '7';
  133. }
  134. template<class Iterator>
  135. static bool parseOctalLiteral (Iterator& source) noexcept
  136. {
  137. if (source.nextChar() != '0')
  138. return false;
  139. if (! isOctalDigit (source.nextChar()))
  140. return false;
  141. while (isOctalDigit (source.peekNextChar()))
  142. source.skip();
  143. return skipNumberSuffix (source);
  144. }
  145. static bool isDecimalDigit (const juce_wchar c) noexcept
  146. {
  147. return c >= '0' && c <= '9';
  148. }
  149. template<class Iterator>
  150. static bool parseDecimalLiteral (Iterator& source) noexcept
  151. {
  152. int numChars = 0;
  153. while (isDecimalDigit (source.peekNextChar()))
  154. {
  155. ++numChars;
  156. source.skip();
  157. }
  158. if (numChars == 0)
  159. return false;
  160. return skipNumberSuffix (source);
  161. }
  162. template<class Iterator>
  163. static bool parseFloatLiteral (Iterator& source) noexcept
  164. {
  165. int numDigits = 0;
  166. while (isDecimalDigit (source.peekNextChar()))
  167. {
  168. source.skip();
  169. ++numDigits;
  170. }
  171. const bool hasPoint = (source.peekNextChar() == '.');
  172. if (hasPoint)
  173. {
  174. source.skip();
  175. while (isDecimalDigit (source.peekNextChar()))
  176. {
  177. source.skip();
  178. ++numDigits;
  179. }
  180. }
  181. if (numDigits == 0)
  182. return false;
  183. juce_wchar c = source.peekNextChar();
  184. const bool hasExponent = (c == 'e' || c == 'E');
  185. if (hasExponent)
  186. {
  187. source.skip();
  188. c = source.peekNextChar();
  189. if (c == '+' || c == '-')
  190. source.skip();
  191. int numExpDigits = 0;
  192. while (isDecimalDigit (source.peekNextChar()))
  193. {
  194. source.skip();
  195. ++numExpDigits;
  196. }
  197. if (numExpDigits == 0)
  198. return false;
  199. }
  200. c = source.peekNextChar();
  201. if (c == 'f' || c == 'F')
  202. source.skip();
  203. else if (! (hasExponent || hasPoint))
  204. return false;
  205. return true;
  206. }
  207. template<class Iterator>
  208. static int parseNumber (Iterator& source)
  209. {
  210. const Iterator original (source);
  211. if (parseFloatLiteral (source))
  212. return CPlusPlusCodeTokeniser::tokenType_float;
  213. source = original;
  214. if (parseHexLiteral (source))
  215. return CPlusPlusCodeTokeniser::tokenType_integer;
  216. source = original;
  217. if (parseOctalLiteral (source))
  218. return CPlusPlusCodeTokeniser::tokenType_integer;
  219. source = original;
  220. if (parseDecimalLiteral (source))
  221. return CPlusPlusCodeTokeniser::tokenType_integer;
  222. source = original;
  223. return CPlusPlusCodeTokeniser::tokenType_error;
  224. }
  225. template<class Iterator>
  226. static void skipQuotedString (Iterator& source) noexcept
  227. {
  228. const juce_wchar quote = source.nextChar();
  229. for (;;)
  230. {
  231. const juce_wchar c = source.nextChar();
  232. if (c == quote || c == 0)
  233. break;
  234. if (c == '\\')
  235. source.skip();
  236. }
  237. }
  238. template<class Iterator>
  239. static void skipComment (Iterator& source) noexcept
  240. {
  241. bool lastWasStar = false;
  242. for (;;)
  243. {
  244. const juce_wchar c = source.nextChar();
  245. if (c == 0 || (c == '/' && lastWasStar))
  246. break;
  247. lastWasStar = (c == '*');
  248. }
  249. }
  250. template<class Iterator>
  251. static void skipPreprocessorLine (Iterator& source) noexcept
  252. {
  253. bool lastWasBackslash = false;
  254. for (;;)
  255. {
  256. const juce_wchar c = source.peekNextChar();
  257. if (c == '"')
  258. {
  259. skipQuotedString (source);
  260. continue;
  261. }
  262. if (c == '/')
  263. {
  264. Iterator next (source);
  265. next.skip();
  266. const juce_wchar c2 = next.peekNextChar();
  267. if (c2 == '/' || c2 == '*')
  268. return;
  269. }
  270. if (c == 0)
  271. break;
  272. if (c == '\n' || c == '\r')
  273. {
  274. source.skipToEndOfLine();
  275. if (lastWasBackslash)
  276. skipPreprocessorLine (source);
  277. break;
  278. }
  279. lastWasBackslash = (c == '\\');
  280. source.skip();
  281. }
  282. }
  283. template<class Iterator>
  284. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c) noexcept
  285. {
  286. if (source.peekNextChar() == c)
  287. source.skip();
  288. }
  289. template<class Iterator>
  290. static void skipIfNextCharMatches (Iterator& source, const juce_wchar c1, const juce_wchar c2) noexcept
  291. {
  292. const juce_wchar c = source.peekNextChar();
  293. if (c == c1 || c == c2)
  294. source.skip();
  295. }
  296. template<class Iterator>
  297. static int readNextToken (Iterator& source)
  298. {
  299. int result = CPlusPlusCodeTokeniser::tokenType_error;
  300. source.skipWhitespace();
  301. const juce_wchar firstChar = source.peekNextChar();
  302. switch (firstChar)
  303. {
  304. case 0:
  305. source.skip();
  306. break;
  307. case '0':
  308. case '1':
  309. case '2':
  310. case '3':
  311. case '4':
  312. case '5':
  313. case '6':
  314. case '7':
  315. case '8':
  316. case '9':
  317. case '.':
  318. result = parseNumber (source);
  319. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  320. {
  321. source.skip();
  322. if (firstChar == '.')
  323. result = CPlusPlusCodeTokeniser::tokenType_punctuation;
  324. }
  325. break;
  326. case ',':
  327. case ';':
  328. case ':':
  329. source.skip();
  330. result = CPlusPlusCodeTokeniser::tokenType_punctuation;
  331. break;
  332. case '(':
  333. case ')':
  334. case '{':
  335. case '}':
  336. case '[':
  337. case ']':
  338. source.skip();
  339. result = CPlusPlusCodeTokeniser::tokenType_bracket;
  340. break;
  341. case '"':
  342. case '\'':
  343. skipQuotedString (source);
  344. result = CPlusPlusCodeTokeniser::tokenType_string;
  345. break;
  346. case '+':
  347. result = CPlusPlusCodeTokeniser::tokenType_operator;
  348. source.skip();
  349. skipIfNextCharMatches (source, '+', '=');
  350. break;
  351. case '-':
  352. source.skip();
  353. result = parseNumber (source);
  354. if (result == CPlusPlusCodeTokeniser::tokenType_error)
  355. {
  356. result = CPlusPlusCodeTokeniser::tokenType_operator;
  357. skipIfNextCharMatches (source, '-', '=');
  358. }
  359. break;
  360. case '*':
  361. case '%':
  362. case '=':
  363. case '!':
  364. result = CPlusPlusCodeTokeniser::tokenType_operator;
  365. source.skip();
  366. skipIfNextCharMatches (source, '=');
  367. break;
  368. case '/':
  369. result = CPlusPlusCodeTokeniser::tokenType_operator;
  370. source.skip();
  371. if (source.peekNextChar() == '=')
  372. {
  373. source.skip();
  374. }
  375. else if (source.peekNextChar() == '/')
  376. {
  377. result = CPlusPlusCodeTokeniser::tokenType_comment;
  378. source.skipToEndOfLine();
  379. }
  380. else if (source.peekNextChar() == '*')
  381. {
  382. source.skip();
  383. result = CPlusPlusCodeTokeniser::tokenType_comment;
  384. skipComment (source);
  385. }
  386. break;
  387. case '?':
  388. case '~':
  389. source.skip();
  390. result = CPlusPlusCodeTokeniser::tokenType_operator;
  391. break;
  392. case '<':
  393. case '>':
  394. case '|':
  395. case '&':
  396. case '^':
  397. source.skip();
  398. result = CPlusPlusCodeTokeniser::tokenType_operator;
  399. skipIfNextCharMatches (source, firstChar);
  400. skipIfNextCharMatches (source, '=');
  401. break;
  402. case '#':
  403. result = CPlusPlusCodeTokeniser::tokenType_preprocessor;
  404. skipPreprocessorLine (source);
  405. break;
  406. default:
  407. if (isIdentifierStart (firstChar))
  408. result = parseIdentifier (source);
  409. else
  410. source.skip();
  411. break;
  412. }
  413. return result;
  414. }
  415. };