KXStudio Website https://kx.studio/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.php 13KB

9 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. <?php
  2. /**
  3. * Twig::Lexer
  4. * ~~~~~~~~~~~
  5. *
  6. * This module implements the Twig lexer.
  7. *
  8. * :copyright: 2008 by Armin Ronacher.
  9. * :license: BSD.
  10. */
  11. /**
  12. * Tokenizes a given string and returns a new Twig_TokenStream.
  13. */
  14. function twig_tokenize($source, $filename=NULL)
  15. {
  16. $lexer = new Twig_Lexer($source, $filename);
  17. return new Twig_TokenStream($lexer, $filename);
  18. }
  19. /**
  20. * A simple lexer for twig templates.
  21. */
  22. class Twig_Lexer
  23. {
  24. private $cursor;
  25. private $position;
  26. private $end;
  27. private $pushedBack;
  28. public $code;
  29. public $lineno;
  30. public $filename;
  31. const POSITION_DATA = 0;
  32. const POSITION_BLOCK = 1;
  33. const POSITION_VAR = 2;
  34. const REGEX_NAME = '/[A-Za-z_][A-Za-z0-9_]*/A';
  35. const REGEX_NUMBER = '/[0-9]+(?:\.[0-9])?/A';
  36. const REGEX_STRING = '/(?:"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\')/Asm';
  37. const REGEX_OPERATOR = '/<=?|>=?|[!=]=|[(){}.,%*\/+~|-]|\[|\]/A';
  38. public function __construct($code, $filename=NULL)
  39. {
  40. $this->code = preg_replace('/(\r\n|\r|\n)/', '\n', $code);
  41. $this->filename = $filename;
  42. $this->cursor = 0;
  43. $this->lineno = 1;
  44. $this->pushedBack = array();
  45. $this->end = strlen($this->code);
  46. $this->position = self::POSITION_DATA;
  47. }
  48. /**
  49. * parse the nex token and return it.
  50. */
  51. public function nextToken()
  52. {
  53. // do we have tokens pushed back? get one
  54. if (!empty($this->pushedBack))
  55. return array_shift($this->pushedBack);
  56. // have we reached the end of the code?
  57. if ($this->cursor >= $this->end)
  58. return Twig_Token::EOF($this->lineno);
  59. // otherwise dispatch to the lexing functions depending
  60. // on our current position in the code.
  61. switch ($this->position) {
  62. case self::POSITION_DATA:
  63. $tokens = $this->lexData(); break;
  64. case self::POSITION_BLOCK:
  65. $tokens = $this->lexBlock(); break;
  66. case self::POSITION_VAR:
  67. $tokens = $this->lexVar(); break;
  68. }
  69. // if the return value is not an array it's a token
  70. if (!is_array($tokens))
  71. return $tokens;
  72. // empty array, call again
  73. else if (empty($tokens))
  74. return $this->nextToken();
  75. // if we have multiple items we push them to the buffer
  76. else if (count($tokens) > 1) {
  77. $first = array_shift($tokens);
  78. $this->pushedBack = $tokens;
  79. return $first;
  80. }
  81. // otherwise return the first item of the array.
  82. return $tokens[0];
  83. }
  84. private function lexData()
  85. {
  86. $match = NULL;
  87. // if no matches are left we return the rest of the template
  88. // as simple text token
  89. if (!preg_match('/(.*?)(\{[%#]|\$(?!\$))/A', $this->code, $match,
  90. NULL, $this->cursor)) {
  91. $rv = Twig_Token::Text(substr($this->code, $this->cursor),
  92. $this->lineno);
  93. $this->cursor = $this->end;
  94. return $rv;
  95. }
  96. $this->cursor += strlen($match[0]);
  97. // update the lineno on the instance
  98. $lineno = $this->lineno;
  99. $this->lineno += substr_count($match[0], '\n');
  100. // push the template text first
  101. $text = $match[1];
  102. if (!empty($text)) {
  103. $result = array(Twig_Token::Text($text, $lineno));
  104. $lineno += substr_count($text, '\n');
  105. }
  106. else
  107. $result = array();
  108. // block start token, let's return a token for that.
  109. if (($token = $match[2]) !== '$') {
  110. // if our section is a comment, just return the text
  111. if ($token[1] == '#') {
  112. if (!preg_match('/.*?#\}/A', $this->code, $match,
  113. NULL, $this->cursor))
  114. throw new Twig_SyntaxError('unclosed comment',
  115. $this->lineno);
  116. $this->cursor += strlen($match[0]);
  117. $this->lineno += substr_count($match[0], '\n');
  118. return $result;
  119. }
  120. $result[] = new Twig_Token(Twig_Token::BLOCK_START_TYPE,
  121. '', $lineno);
  122. $this->position = self::POSITION_BLOCK;
  123. }
  124. // quoted block
  125. else if (isset($this->code[$this->cursor]) &&
  126. $this->code[$this->cursor] == '{') {
  127. $this->cursor++;
  128. $result[] = new Twig_Token(Twig_Token::VAR_START_TYPE,
  129. '', $lineno);
  130. $this->position = self::POSITION_VAR;
  131. }
  132. // inline variable expressions. If there is no name next we
  133. // fail silently. $ 42 could be common so no need to be a
  134. // dickhead.
  135. else if (preg_match(self::REGEX_NAME, $this->code, $match,
  136. NULL, $this->cursor)) {
  137. $result[] = new Twig_Token(Twig_Token::VAR_START_TYPE,
  138. '', $lineno);
  139. $result[] = Twig_Token::Name($match[0], $lineno);
  140. $this->cursor += strlen($match[0]);
  141. // allow attribute lookup
  142. while (isset($this->code[$this->cursor]) &&
  143. $this->code[$this->cursor] === '.') {
  144. ++$this->cursor;
  145. $result[] = Twig_Token::Operator('.', $this->lineno);
  146. if (preg_match(self::REGEX_NAME, $this->code,
  147. $match, NULL, $this->cursor)) {
  148. $this->cursor += strlen($match[0]);
  149. $result[] = Twig_Token::Name($match[0],
  150. $this->lineno);
  151. }
  152. else if (preg_match(self::REGEX_NUMBER, $this->code,
  153. $match, NULL, $this->cursor)) {
  154. $this->cursor += strlen($match[0]);
  155. $result[] = Twig_Token::Number($match[0],
  156. $this->lineno);
  157. }
  158. else {
  159. --$this->cursor;
  160. break;
  161. }
  162. }
  163. $result[] = new Twig_Token(Twig_Token::VAR_END_TYPE,
  164. '', $lineno);
  165. }
  166. return $result;
  167. }
  168. private function lexBlock()
  169. {
  170. $match = NULL;
  171. if (preg_match('/\s*%\}/A', $this->code, $match, NULL, $this->cursor)) {
  172. $this->cursor += strlen($match[0]);
  173. $lineno = $this->lineno;
  174. $this->lineno += substr_count($match[0], '\n');
  175. $this->position = self::POSITION_DATA;
  176. return new Twig_Token(Twig_Token::BLOCK_END_TYPE, '', $lineno);
  177. }
  178. return $this->lexExpression();
  179. }
  180. private function lexVar()
  181. {
  182. $match = NULL;
  183. if (preg_match('/\s*\}/A', $this->code, $match, NULL, $this->cursor)) {
  184. $this->cursor += strlen($match[0]);
  185. $lineno = $this->lineno;
  186. $this->lineno += substr_count($match[0], '\n');
  187. $this->position = self::POSITION_DATA;
  188. return new Twig_Token(Twig_Token::VAR_END_TYPE, '', $lineno);
  189. }
  190. return $this->lexExpression();
  191. }
  192. private function lexExpression()
  193. {
  194. $match = NULL;
  195. // skip whitespace
  196. while (preg_match('/\s+/A', $this->code, $match, NULL,
  197. $this->cursor)) {
  198. $this->cursor += strlen($match[0]);
  199. $this->lineno += substr_count($match[0], '\n');
  200. }
  201. // sanity check
  202. if ($this->cursor >= $this->end)
  203. throw new Twig_SyntaxError('unexpected end of stream',
  204. $this->lineno, $this->filename);
  205. // first parse operators
  206. if (preg_match(self::REGEX_OPERATOR, $this->code, $match, NULL,
  207. $this->cursor)) {
  208. $this->cursor += strlen($match[0]);
  209. return Twig_Token::Operator($match[0], $this->lineno);
  210. }
  211. // now names
  212. if (preg_match(self::REGEX_NAME, $this->code, $match, NULL,
  213. $this->cursor)) {
  214. $this->cursor += strlen($match[0]);
  215. return Twig_Token::Name($match[0], $this->lineno);
  216. }
  217. // then numbers
  218. else if (preg_match(self::REGEX_NUMBER, $this->code, $match,
  219. NULL, $this->cursor)) {
  220. $this->cursor += strlen($match[0]);
  221. $value = (float)$match[0];
  222. if ((int)$value === $value)
  223. $value = (int)$value;
  224. return Twig_Token::Number($value, $this->lineno);
  225. }
  226. // and finally strings
  227. else if (preg_match(self::REGEX_STRING, $this->code, $match,
  228. NULL, $this->cursor)) {
  229. $this->cursor += strlen($match[0]);
  230. $this->lineno += substr_count($match[0], '\n');
  231. $value = stripcslashes(substr($match[0], 1, strlen($match[0]) - 2));
  232. return Twig_Token::String($value, $this->lineno);
  233. }
  234. // unlexable
  235. throw new Twig_SyntaxError("Unexpected character '" .
  236. $this->code[$this->cursor] . "'.",
  237. $this->lineno, $this->filename);
  238. }
  239. }
  240. /**
  241. * Wrapper around a lexer for simplified token access.
  242. */
  243. class Twig_TokenStream
  244. {
  245. private $pushed;
  246. private $lexer;
  247. public $filename;
  248. public $current;
  249. public $eof;
  250. public function __construct($lexer, $filename)
  251. {
  252. $this->pushed = array();
  253. $this->lexer = $lexer;
  254. $this->filename = $filename;
  255. $this->next();
  256. }
  257. public function push($token)
  258. {
  259. $this->pushed[] = $token;
  260. }
  261. /**
  262. * set the pointer to the next token and return the old one.
  263. */
  264. public function next()
  265. {
  266. if (!empty($this->pushed))
  267. $token = array_shift($this->pushed);
  268. else
  269. $token = $this->lexer->nextToken();
  270. $old = $this->current;
  271. $this->current = $token;
  272. $this->eof = $token->type === Twig_Token::EOF_TYPE;
  273. return $old;
  274. }
  275. /**
  276. * Look at the next token.
  277. */
  278. public function look()
  279. {
  280. $old = $this->next();
  281. $new = $this->current;
  282. $this->push($old);
  283. $this->push($new);
  284. return $new;
  285. }
  286. /**
  287. * Skip some tokens.
  288. */
  289. public function skip($times=1)
  290. {
  291. for ($i = 0; $i < $times; ++$i)
  292. $this->next();
  293. }
  294. /**
  295. * expect a token (like $token->test()) and return it or raise
  296. * a syntax error.
  297. */
  298. public function expect($primary, $secondary=NULL)
  299. {
  300. $token = $this->current;
  301. if (!$token->test($primary, $secondary))
  302. throw new Twig_SyntaxError('unexpected token',
  303. $this->current->lineno);
  304. $this->next();
  305. return $token;
  306. }
  307. /**
  308. * Forward that call to the current token.
  309. */
  310. public function test($primary, $secondary=NULL)
  311. {
  312. return $this->current->test($primary, $secondary);
  313. }
  314. }
  315. /**
  316. * Simple struct for tokens.
  317. */
  318. class Twig_Token
  319. {
  320. public $type;
  321. public $value;
  322. public $lineno;
  323. const TEXT_TYPE = 0;
  324. const EOF_TYPE = -1;
  325. const BLOCK_START_TYPE = 1;
  326. const VAR_START_TYPE = 2;
  327. const BLOCK_END_TYPE = 3;
  328. const VAR_END_TYPE = 4;
  329. const NAME_TYPE = 5;
  330. const NUMBER_TYPE = 6;
  331. const STRING_TYPE = 7;
  332. const OPERATOR_TYPE = 8;
  333. public function __construct($type, $value, $lineno)
  334. {
  335. $this->type = $type;
  336. $this->value = $value;
  337. $this->lineno = $lineno;
  338. }
  339. /**
  340. * Test the current token for a type. The first argument is the type
  341. * of the token (if not given Twig_Token::NAME_NAME), the second the
  342. * value of the token (if not given value is not checked).
  343. * the token value can be an array if multiple checks shoudl be
  344. * performed.
  345. */
  346. public function test($type, $values=NULL)
  347. {
  348. if (is_null($values) && !is_int($type)) {
  349. $values = $type;
  350. $type = self::NAME_TYPE;
  351. }
  352. return ($this->type === $type) && (
  353. is_null($values) ||
  354. (is_array($values) && in_array($this->value, $values)) ||
  355. $this->value == $values
  356. );
  357. }
  358. public static function Text($value, $lineno)
  359. {
  360. return new Twig_Token(self::TEXT_TYPE, $value, $lineno);
  361. }
  362. public static function EOF($lineno)
  363. {
  364. return new Twig_Token(self::EOF_TYPE, '', $lineno);
  365. }
  366. public static function Name($value, $lineno)
  367. {
  368. return new Twig_Token(self::NAME_TYPE, $value, $lineno);
  369. }
  370. public static function Number($value, $lineno)
  371. {
  372. return new Twig_Token(self::NUMBER_TYPE, $value, $lineno);
  373. }
  374. public static function String($value, $lineno)
  375. {
  376. return new Twig_Token(self::STRING_TYPE, $value, $lineno);
  377. }
  378. public static function Operator($value, $lineno)
  379. {
  380. return new Twig_Token(self::OPERATOR_TYPE, $value, $lineno);
  381. }
  382. }