1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.peaseplate.internal.lang;
21
22 import org.peaseplate.locator.TemplateLocator;
23
24 public class Tokenizer {
25
26 private static final String KEYWORD_TRUE = "true";
27 private static final String KEYWORD_FALSE = "false";
28 private static final String KEYWORD_THIS = "$this";
29 private static final String KEYWORD_POP = "$pop";
30 private static final String KEYWORD_NULL = "null";
31
32 public enum Type {
33 NONE,
34 IDENTIFIER,
35 VARIABLE,
36 NUMERIC,
37 STRING,
38 KEY,
39 KEYWORD_TRUE,
40 KEYWORD_FALSE,
41 KEYWORD_NULL,
42 KEYWORD_THIS,
43 KEYWORD_POP,
44 DELEGATOR,
45 SEPARATOR,
46 UNARY_OPERATOR,
47 MULTIPLICATIVE_OPERATOR,
48 ADDITIVE_OPERATOR,
49 SHIFT_OPERATOR,
50 RELATIONAL_OPERATOR,
51 EQUALITY_OPERATOR,
52 BITWISE_OPERATOR,
53 CONDITIONAL_AND_OPERATOR,
54 CONDITIONAL_OR_OPERATOR,
55 INLINE_CONDIDION,
56 INLINE_SEPARATOR,
57 ASSIGNMENT,
58 PARENTHESIS_OPEN,
59 PARENTHESIS_CLOSE,
60 QUERY_OPEN,
61 QUERY_CLOSE,
62 TRANSFORMER_CALL,
63 ERROR
64 }
65
66 private static final char EOC = 0;
67
68 private final char[] code;
69 private final int endOffset;
70
71 private int offset = 0;
72 private int line = 1;
73 private int column = 1;
74 private int escapeCount = 0;
75
76 private char lookAhead = EOC;
77 private char c = EOC;
78
79 private Type tokenType = Type.NONE;
80 private Object tokenValue = null;
81 private int tokenLine = 0;
82 private int tokenColumn = 0;
83
84 public Tokenizer(TemplateLocator locator, int line, int column, char[] code, int offset, int length) {
85 super();
86
87 this.line = line;
88 this.column = column;
89
90 this.code = code;
91 this.offset = offset;
92 this.endOffset = offset + length;
93 escapeCount = 0;
94
95 lookAhead = (offset+1 < endOffset) ? code[offset+1] : EOC;
96 c = (offset < endOffset) ? code[offset] : EOC;
97 }
98
99 public Type getTokenType() {
100 return tokenType;
101 }
102
103 public Object getTokenValue() {
104 return tokenValue;
105 }
106
107 public int getTokenLine() {
108 return tokenLine;
109 }
110
111 public int getTokenColumn() {
112 return tokenColumn;
113 }
114
115 public Type readToken() throws TokenizerException {
116 nextWhileWhitespace();
117
118 tokenLine = line;
119 tokenColumn = column;
120
121 if (c == EOC) {
122 tokenType = Type.NONE;
123 tokenValue = "end of code";
124
125 return tokenType;
126 }
127 else if (c == '$') {
128
129 int start = offset;
130
131 while (true) {
132 next();
133
134 if ((!isLetter(c)) && (!isDigit(c)) && (c != '_')) {
135 tokenValue = new String(code, start, offset-start);
136
137 if (KEYWORD_THIS.equals(tokenValue))
138 tokenType = Type.KEYWORD_THIS;
139 else if (KEYWORD_POP.equals(tokenValue))
140 tokenType = Type.KEYWORD_POP;
141 else
142 tokenType = Type.VARIABLE;
143
144 return tokenType;
145 }
146 }
147 }
148 else if ((isLetter(c)) || (c == '_')) {
149
150 int start = offset;
151
152 while (true) {
153 next();
154
155 if ((!isLetter(c)) && (!isDigit(c)) && (c != '_')) {
156 tokenValue = new String(code, start, offset-start);
157
158 if (KEYWORD_TRUE.equals(tokenValue))
159 tokenType = Type.KEYWORD_TRUE;
160 else if (KEYWORD_FALSE.equals(tokenValue))
161 tokenType = Type.KEYWORD_FALSE;
162 else if (KEYWORD_NULL.equals(tokenValue))
163 tokenType = Type.KEYWORD_NULL;
164 else
165 tokenType = Type.IDENTIFIER;
166
167 return tokenType;
168 }
169 }
170 }
171
172 else if ((isDigit(c)) || (c == '#')) {
173
174 int start = offset;
175 int startLine = line;
176 int startColumn = column;
177 boolean floatingpoint = false;
178
179 while (true) {
180 next();
181
182 if ((c == '.') || (c == 'e') || (c == 'E'))
183 floatingpoint = true;
184
185 if ((!isLetter(c)) && (!isDigit(c)) && (c != '.')) {
186 String s = new String(code, start, offset-start);
187
188 if ((s.endsWith("d")) || (s.endsWith("D"))) {
189 try {
190 tokenValue = Double.valueOf(s.substring(0, s.length()-1));
191 }
192 catch (NumberFormatException e) {
193 throw new TokenizerException(startLine, startColumn, "Could not parse double tokenValue \"" + s + "\"", e);
194 }
195 }
196 else if ((s.endsWith("f")) || (s.endsWith("F"))) {
197 try {
198 tokenValue = Float.valueOf(s.substring(0, s.length()-1));
199 }
200 catch (NumberFormatException e) {
201 throw new TokenizerException(startLine, startColumn, "Could not parse float tokenValue \"" + s + "\"", e);
202 }
203 }
204 else if (floatingpoint) {
205 try {
206 tokenValue = Double.valueOf(s);
207 }
208 catch (NumberFormatException e) {
209 throw new TokenizerException(startLine, startColumn, "Could not parse double tokenValue \"" + s + "\"", e);
210 }
211 }
212 else if ((s.endsWith("l")) || (s.endsWith("L"))) {
213 try {
214 tokenValue = Long.decode(s.substring(0, s.length()-1));
215 }
216 catch (NumberFormatException e) {
217 throw new TokenizerException(startLine, startColumn, "Could not parse long tokenValue \"" + s + "\"", e);
218 }
219 }
220 else {
221 try {
222 tokenValue = Integer.decode(s);
223 }
224 catch (NumberFormatException e) {
225 throw new TokenizerException(startLine, startColumn, "Could not parse integer tokenValue \"" + s + "\"", e);
226 }
227 }
228
229 tokenType = Type.NUMERIC;
230
231 return tokenType;
232 }
233 }
234 }
235 else if (c == '\"') {
236 int startLine = line;
237 int startColumn = column;
238 StringBuilder builder = new StringBuilder();
239
240 while (true) {
241 next();
242
243 if (c == EOC)
244 throw new TokenizerException(startLine, startColumn, "Unclosed string");
245 else if (c == '\"') {
246 next();
247
248 tokenType = Type.STRING;
249 tokenValue = builder.toString();
250
251 return tokenType;
252 }
253 else if (c == '\\') {
254 next();
255
256 if (c == EOC)
257 throw new TokenizerException(startLine, startColumn, "Unclosed string");
258
259 switch (c) {
260 case '\\':
261 builder.append("\\");
262 break;
263
264 case '\"':
265 builder.append("\"");
266 break;
267
268 case '\'':
269 builder.append("\'");
270 break;
271
272 case '\r':
273 builder.append("\r");
274 break;
275
276 case '\t':
277 builder.append("\t");
278 break;
279
280 case '\n':
281 builder.append("\n");
282 break;
283
284 default:
285 throw new TokenizerException(line, column, "Invalid escape sequence \"\\" + c + "\"");
286 }
287 }
288 else
289 builder.append(c);
290 }
291 }
292 else if (c == '.') {
293 tokenType = Type.DELEGATOR;
294 tokenValue = String.valueOf(c);
295
296 next();
297
298 return tokenType;
299 }
300 else if (c == ',') {
301 tokenType = Type.SEPARATOR;
302 tokenValue = String.valueOf(c);
303
304 next();
305
306 return tokenType;
307 }
308 else if (isUnaryOperator()) {
309 tokenType = Type.UNARY_OPERATOR;
310 tokenValue = String.valueOf(c);
311
312 next();
313
314 return tokenType;
315 }
316 else if (isMultiplicativeOperator()) {
317 tokenType = Type.MULTIPLICATIVE_OPERATOR;
318 tokenValue = String.valueOf(c);
319
320 next();
321
322 return tokenType;
323 }
324 else if (isAdditiveOperator()) {
325 tokenType = Type.ADDITIVE_OPERATOR;
326 tokenValue = String.valueOf(c);
327
328 next();
329
330 return tokenType;
331 }
332 else if (isShiftOperator()) {
333 tokenType = Type.SHIFT_OPERATOR;
334
335 if (c == '>') {
336 next();
337
338 if (lookAhead == '>') {
339 next();
340 tokenValue = ">>>";
341 }
342 else {
343 tokenValue = ">>";
344 }
345 }
346 else {
347 next();
348
349 tokenValue = "<<";
350 }
351
352 next();
353
354 return tokenType;
355 }
356 else if (isRelationalOperator()) {
357 tokenType = Type.RELATIONAL_OPERATOR;
358
359 if (lookAhead == '=') {
360 tokenValue = String.valueOf(c) + String.valueOf(lookAhead);
361
362 next();
363 }
364 else {
365 tokenValue = String.valueOf(c);
366 }
367
368 next();
369
370 return tokenType;
371 }
372 else if (isEqualityOperator()) {
373 tokenType = Type.EQUALITY_OPERATOR;
374 tokenValue = String.valueOf(c) + String.valueOf(lookAhead);
375
376 next();
377 next();
378
379 return tokenType;
380 }
381 else if (isBitwiseOperator()) {
382 tokenType = Type.BITWISE_OPERATOR;
383 tokenValue = String.valueOf(c);
384
385 next();
386
387 return tokenType;
388 }
389 else if (isConditionalAndOperator()) {
390 tokenType = Type.CONDITIONAL_AND_OPERATOR;
391 tokenValue = "&&";
392
393 next();
394 next();
395
396 return tokenType;
397 }
398 else if (isConditionalOrOperator()) {
399 tokenType = Type.CONDITIONAL_OR_OPERATOR;
400 tokenValue = "||";
401
402 next();
403 next();
404
405 return tokenType;
406 }
407 else if (c == '?') {
408 tokenType = Type.INLINE_CONDIDION;
409 tokenValue = "?";
410
411 next();
412
413 return tokenType;
414 }
415 else if (c == ':') {
416 tokenType = Type.INLINE_SEPARATOR;
417 tokenValue = ":";
418
419 next();
420
421 return tokenType;
422 }
423 else if (isAssignment()) {
424 tokenType = Type.ASSIGNMENT;
425 tokenValue = "=";
426
427 next();
428
429 return tokenType;
430 }
431 else if (c == '(') {
432 tokenType = Type.PARENTHESIS_OPEN;
433 tokenValue = String.valueOf(c);
434
435 next();
436
437 return tokenType;
438 }
439 else if (c == ')') {
440 tokenType = Type.PARENTHESIS_CLOSE;
441 tokenValue = String.valueOf(c);
442
443 next();
444
445 return tokenType;
446 }
447 else if (c == '[') {
448 tokenType = Type.QUERY_OPEN;
449 tokenValue = String.valueOf(c);
450
451 next();
452
453 return tokenType;
454 }
455 else if (c == ']') {
456 tokenType = Type.QUERY_CLOSE;
457 tokenValue = String.valueOf(c);
458
459 next();
460
461 return tokenType;
462 }
463 else if (isTransformerCall()) {
464 tokenType = Type.TRANSFORMER_CALL;
465 tokenValue = "->";
466
467 next();
468 next();
469
470 return tokenType;
471 }
472 else {
473 tokenType = Type.ERROR;
474 tokenValue = String.valueOf(c);
475
476 return tokenType;
477 }
478 }
479
480
481
482
483
484
485
486
487
488 public Type readKeyToken() throws TokenizerException {
489 nextWhileWhitespace();
490
491 if (c == EOC) {
492 tokenType = Type.NONE;
493 tokenValue = "end of code";
494
495 return tokenType;
496 }
497 else if (isKeyIdentifier()) {
498
499 int start = offset;
500
501 while (true) {
502 next();
503
504 if (!isKeyIdentifier()) {
505 tokenValue = new String(code, start, offset-start);
506 tokenType = Type.KEY;
507
508 return tokenType;
509 }
510 }
511 }
512 else
513 return readToken();
514 }
515
516
517
518
519
520
521
522
523 private boolean next() {
524 offset += 1;
525 column += 1;
526
527 c = lookAhead;
528
529 lookAhead = (offset+1 < endOffset) ? code[offset+1] : EOC;
530
531 if (c == '\\')
532 escapeCount += 1;
533 else if (escapeCount > 0)
534 escapeCount = 0;
535
536 if (c == EOC)
537 return false;
538 else if ((c == '\n') || ((c == '\r') && (lookAhead != '\n'))) {
539 line += 1;
540 column = 1;
541 }
542
543 return true;
544 }
545
546 private boolean nextWhileWhitespace() {
547 while (isWhitespace())
548 if (!next())
549 return false;
550
551 return true;
552 }
553
554 private boolean isWhitespace() {
555 return (c == ' ') || (c == '\t') || (c == '\r') || (c == '\n');
556 }
557
558 private boolean isLetter(char c) {
559 return
560 ((c >= 'a') && (c <= 'z')) ||
561 ((c >= 'A') && (c <= 'Z'))
562 ;
563 }
564
565 private boolean isDigit(char c) {
566 return (c >= '0') && (c <= '9');
567 }
568
569 private boolean isUnaryOperator() {
570 return (
571 (c == '+') || ((c == '-') && (lookAhead != '>')) ||
572 (c == '~') || ((c == '!') && (lookAhead != '='))
573 ) && (
574 (tokenType == Type.NONE) || (tokenType == Type.UNARY_OPERATOR) ||
575 (tokenType == Type.MULTIPLICATIVE_OPERATOR) || (tokenType == Type.ADDITIVE_OPERATOR) ||
576 (tokenType == Type.SHIFT_OPERATOR) || (tokenType == Type.RELATIONAL_OPERATOR) ||
577 (tokenType == Type.EQUALITY_OPERATOR) || (tokenType == Type.BITWISE_OPERATOR) ||
578 (tokenType == Type.CONDITIONAL_AND_OPERATOR) || (tokenType == Type.CONDITIONAL_OR_OPERATOR) ||
579 (tokenType == Type.PARENTHESIS_OPEN) ||
580 (tokenType == Type.TRANSFORMER_CALL) ||
581 (tokenType == Type.QUERY_OPEN)
582 );
583 }
584
585 private boolean isMultiplicativeOperator() {
586 return (c == '*') || (c == '/') || (c == '%');
587 }
588
589 private boolean isAdditiveOperator() {
590 return (c == '+') || ((c == '-') && (lookAhead != '>'));
591 }
592
593 private boolean isShiftOperator() {
594 return
595 ((c == '<') && (lookAhead == '<')) ||
596 ((c == '>') && (lookAhead == '>'))
597 ;
598 }
599
600 private boolean isRelationalOperator() {
601 return (c == '<') || (c == '>');
602 }
603
604 private boolean isEqualityOperator() {
605 return ((c == '=') || (c == '!')) && (lookAhead == '=');
606 }
607
608 private boolean isBitwiseOperator() {
609 return
610 ((c == '&') && (lookAhead != '&')) ||
611 (c == '^') ||
612 ((c == '|') && (lookAhead != '|'))
613 ;
614 }
615
616 private boolean isConditionalAndOperator() {
617 return ((c == '&') && (lookAhead == '&'));
618 }
619
620 private boolean isConditionalOrOperator() {
621 return ((c == '|') && (lookAhead == '|'));
622 }
623
624 private boolean isAssignment() {
625 return (c == '=') && (lookAhead != '=');
626 }
627
628 private boolean isTransformerCall() {
629 return ((c == '-') && (lookAhead == '>'));
630 }
631
632 private boolean isKeyIdentifier() {
633 return ((isLetter(c)) || (isDigit(c)) || (c == '_') || (c == '-') || (c == '.') || (c == '/'));
634 }
635
636 @Override
637 public String toString() {
638 return String.format(
639 "%-24s [%-24s]", getTokenType(), getTokenValue()
640 );
641 }
642 }