001 /*
002 * Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved.
003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004 *
005 * This code is free software; you can redistribute it and/or modify it
006 * under the terms of the GNU General Public License version 2 only, as
007 * published by the Free Software Foundation. Sun designates this
008 * particular file as subject to the "Classpath" exception as provided
009 * by Sun in the LICENSE file that accompanied this code.
010 *
011 * This code is distributed in the hope that it will be useful, but WITHOUT
012 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014 * version 2 for more details (a copy is included in the LICENSE file that
015 * accompanied this code).
016 *
017 * You should have received a copy of the GNU General Public License version
018 * 2 along with this work; if not, write to the Free Software Foundation,
019 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020 *
021 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022 * CA 95054 USA or visit www.sun.com if you need additional information or
023 * have any questions.
024 */
025
026 package com.sun.tools.javac.parser;
027
028 import java.nio.*;
029
030 import com.sun.tools.javac.code.Source;
031 import com.sun.tools.javac.file.JavacFileManager;
032 import com.sun.tools.javac.util.*;
033
034
035 import static com.sun.tools.javac.parser.Token.*;
036 import static com.sun.tools.javac.util.LayoutCharacters.*;
037
038 /** The lexical analyzer maps an input stream consisting of
039 * ASCII characters and Unicode escapes into a token sequence.
040 *
041 * <p><b>This is NOT part of any API supported by Sun Microsystems. If
042 * you write code that depends on this, you do so at your own risk.
043 * This code and its internal interfaces are subject to change or
044 * deletion without notice.</b>
045 */
046 public class Scanner implements Lexer {
047
048 private static boolean scannerDebug = false;
049
050 /** A factory for creating scanners. */
051 public static class Factory {
052 /** The context key for the scanner factory. */
053 public static final Context.Key<Scanner.Factory> scannerFactoryKey =
054 new Context.Key<Scanner.Factory>();
055
056 /** Get the Factory instance for this context. */
057 public static Factory instance(Context context) {
058 Factory instance = context.get(scannerFactoryKey);
059 if (instance == null)
060 instance = new Factory(context);
061 return instance;
062 }
063
064 final Log log;
065 final Names names;
066 final Source source;
067 final Keywords keywords;
068
069 /** Create a new scanner factory. */
070 protected Factory(Context context) {
071 context.put(scannerFactoryKey, this);
072 this.log = Log.instance(context);
073 this.names = Names.instance(context);
074 this.source = Source.instance(context);
075 this.keywords = Keywords.instance(context);
076 }
077
078 public Scanner newScanner(CharSequence input) {
079 if (input instanceof CharBuffer) {
080 return new Scanner(this, (CharBuffer)input);
081 } else {
082 char[] array = input.toString().toCharArray();
083 return newScanner(array, array.length);
084 }
085 }
086
087 public Scanner newScanner(char[] input, int inputLength) {
088 return new Scanner(this, input, inputLength);
089 }
090 }
091
092 /* Output variables; set by nextToken():
093 */
094
095 /** The token, set by nextToken().
096 */
097 private Token token;
098
099 /** Allow hex floating-point literals.
100 */
101 private boolean allowHexFloats;
102
103 /** The token's position, 0-based offset from beginning of text.
104 */
105 private int pos;
106
107 /** Character position just after the last character of the token.
108 */
109 private int endPos;
110
111 /** The last character position of the previous token.
112 */
113 private int prevEndPos;
114
115 /** The position where a lexical error occurred;
116 */
117 private int errPos = Position.NOPOS;
118
119 /** The name of an identifier or token:
120 */
121 private Name name;
122
123 /** The radix of a numeric literal token.
124 */
125 private int radix;
126
127 /** Has a @deprecated been encountered in last doc comment?
128 * this needs to be reset by client.
129 */
130 protected boolean deprecatedFlag = false;
131
132 /** A character buffer for literals.
133 */
134 private char[] sbuf = new char[128];
135 private int sp;
136
137 /** The input buffer, index of next chacter to be read,
138 * index of one past last character in buffer.
139 */
140 private char[] buf;
141 private int bp;
142 private int buflen;
143 private int eofPos;
144
145 /** The current character.
146 */
147 private char ch;
148
149 /** The buffer index of the last converted unicode character
150 */
151 private int unicodeConversionBp = -1;
152
153 /** The log to be used for error reporting.
154 */
155 private final Log log;
156
157 /** The name table. */
158 private final Names names;
159
160 /** The keyword table. */
161 private final Keywords keywords;
162
163 /** Common code for constructors. */
164 private Scanner(Factory fac) {
165 this.log = fac.log;
166 this.names = fac.names;
167 this.keywords = fac.keywords;
168 this.allowHexFloats = fac.source.allowHexFloats();
169 }
170
171 private static final boolean hexFloatsWork = hexFloatsWork();
172 private static boolean hexFloatsWork() {
173 try {
174 Float.valueOf("0x1.0p1");
175 return true;
176 } catch (NumberFormatException ex) {
177 return false;
178 }
179 }
180
181 /** Create a scanner from the input buffer. buffer must implement
182 * array() and compact(), and remaining() must be less than limit().
183 */
184 protected Scanner(Factory fac, CharBuffer buffer) {
185 this(fac, JavacFileManager.toArray(buffer), buffer.limit());
186 }
187
188 /**
189 * Create a scanner from the input array. This method might
190 * modify the array. To avoid copying the input array, ensure
191 * that {@code inputLength < input.length} or
192 * {@code input[input.length -1]} is a white space character.
193 *
194 * @param fac the factory which created this Scanner
195 * @param input the input, might be modified
196 * @param inputLength the size of the input.
197 * Must be positive and less than or equal to input.length.
198 */
199 protected Scanner(Factory fac, char[] input, int inputLength) {
200 this(fac);
201 eofPos = inputLength;
202 if (inputLength == input.length) {
203 if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
204 inputLength--;
205 } else {
206 char[] newInput = new char[inputLength + 1];
207 System.arraycopy(input, 0, newInput, 0, input.length);
208 input = newInput;
209 }
210 }
211 buf = input;
212 buflen = inputLength;
213 buf[buflen] = EOI;
214 bp = -1;
215 scanChar();
216 }
217
218 /** Report an error at the given position using the provided arguments.
219 */
220 private void lexError(int pos, String key, Object... args) {
221 log.error(pos, key, args);
222 token = ERROR;
223 errPos = pos;
224 }
225
226 /** Report an error at the current token position using the provided
227 * arguments.
228 */
229 private void lexError(String key, Object... args) {
230 lexError(pos, key, args);
231 }
232
233 /** Convert an ASCII digit from its base (8, 10, or 16)
234 * to its value.
235 */
236 private int digit(int base) {
237 char c = ch;
238 int result = Character.digit(c, base);
239 if (result >= 0 && c > 0x7f) {
240 lexError(pos+1, "illegal.nonascii.digit");
241 ch = "0123456789abcdef".charAt(result);
242 }
243 return result;
244 }
245
246 /** Convert unicode escape; bp points to initial '\' character
247 * (Spec 3.3).
248 */
249 private void convertUnicode() {
250 if (ch == '\\' && unicodeConversionBp != bp) {
251 bp++; ch = buf[bp];
252 if (ch == 'u') {
253 do {
254 bp++; ch = buf[bp];
255 } while (ch == 'u');
256 int limit = bp + 3;
257 if (limit < buflen) {
258 int d = digit(16);
259 int code = d;
260 while (bp < limit && d >= 0) {
261 bp++; ch = buf[bp];
262 d = digit(16);
263 code = (code << 4) + d;
264 }
265 if (d >= 0) {
266 ch = (char)code;
267 unicodeConversionBp = bp;
268 return;
269 }
270 }
271 lexError(bp, "illegal.unicode.esc");
272 } else {
273 bp--;
274 ch = '\\';
275 }
276 }
277 }
278
279 /** Read next character.
280 */
281 private void scanChar() {
282 ch = buf[++bp];
283 if (ch == '\\') {
284 convertUnicode();
285 }
286 }
287
288 /** Read next character in comment, skipping over double '\' characters.
289 */
290 private void scanCommentChar() {
291 scanChar();
292 if (ch == '\\') {
293 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
294 bp++;
295 } else {
296 convertUnicode();
297 }
298 }
299 }
300
301 /** Append a character to sbuf.
302 */
303 private void putChar(char ch) {
304 if (sp == sbuf.length) {
305 char[] newsbuf = new char[sbuf.length * 2];
306 System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
307 sbuf = newsbuf;
308 }
309 sbuf[sp++] = ch;
310 }
311
312 /** For debugging purposes: print character.
313 */
314 private void dch() {
315 System.err.print(ch); System.out.flush();
316 }
317
318 /** Read next character in character or string literal and copy into sbuf.
319 */
320 private void scanLitChar() {
321 if (ch == '\\') {
322 if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
323 bp++;
324 putChar('\\');
325 scanChar();
326 } else {
327 scanChar();
328 switch (ch) {
329 case '0': case '1': case '2': case '3':
330 case '4': case '5': case '6': case '7':
331 char leadch = ch;
332 int oct = digit(8);
333 scanChar();
334 if ('0' <= ch && ch <= '7') {
335 oct = oct * 8 + digit(8);
336 scanChar();
337 if (leadch <= '3' && '0' <= ch && ch <= '7') {
338 oct = oct * 8 + digit(8);
339 scanChar();
340 }
341 }
342 putChar((char)oct);
343 break;
344 case 'b':
345 putChar('\b'); scanChar(); break;
346 case 't':
347 putChar('\t'); scanChar(); break;
348 case 'n':
349 putChar('\n'); scanChar(); break;
350 case 'f':
351 putChar('\f'); scanChar(); break;
352 case 'r':
353 putChar('\r'); scanChar(); break;
354 case '\'':
355 putChar('\''); scanChar(); break;
356 case '\"':
357 putChar('\"'); scanChar(); break;
358 case '\\':
359 putChar('\\'); scanChar(); break;
360 default:
361 lexError(bp, "illegal.esc.char");
362 }
363 }
364 } else if (bp != buflen) {
365 putChar(ch); scanChar();
366 }
367 }
368
369 /** Read fractional part of hexadecimal floating point number.
370 */
371 private void scanHexExponentAndSuffix() {
372 if (ch == 'p' || ch == 'P') {
373 putChar(ch);
374 scanChar();
375 if (ch == '+' || ch == '-') {
376 putChar(ch);
377 scanChar();
378 }
379 if ('0' <= ch && ch <= '9') {
380 do {
381 putChar(ch);
382 scanChar();
383 } while ('0' <= ch && ch <= '9');
384 if (!allowHexFloats) {
385 lexError("unsupported.fp.lit");
386 allowHexFloats = true;
387 }
388 else if (!hexFloatsWork)
389 lexError("unsupported.cross.fp.lit");
390 } else
391 lexError("malformed.fp.lit");
392 } else {
393 lexError("malformed.fp.lit");
394 }
395 if (ch == 'f' || ch == 'F') {
396 putChar(ch);
397 scanChar();
398 token = FLOATLITERAL;
399 } else {
400 if (ch == 'd' || ch == 'D') {
401 putChar(ch);
402 scanChar();
403 }
404 token = DOUBLELITERAL;
405 }
406 }
407
408 /** Read fractional part of floating point number.
409 */
410 private void scanFraction() {
411 while (digit(10) >= 0) {
412 putChar(ch);
413 scanChar();
414 }
415 int sp1 = sp;
416 if (ch == 'e' || ch == 'E') {
417 putChar(ch);
418 scanChar();
419 if (ch == '+' || ch == '-') {
420 putChar(ch);
421 scanChar();
422 }
423 if ('0' <= ch && ch <= '9') {
424 do {
425 putChar(ch);
426 scanChar();
427 } while ('0' <= ch && ch <= '9');
428 return;
429 }
430 lexError("malformed.fp.lit");
431 sp = sp1;
432 }
433 }
434
435 /** Read fractional part and 'd' or 'f' suffix of floating point number.
436 */
437 private void scanFractionAndSuffix() {
438 this.radix = 10;
439 scanFraction();
440 if (ch == 'f' || ch == 'F') {
441 putChar(ch);
442 scanChar();
443 token = FLOATLITERAL;
444 } else {
445 if (ch == 'd' || ch == 'D') {
446 putChar(ch);
447 scanChar();
448 }
449 token = DOUBLELITERAL;
450 }
451 }
452
453 /** Read fractional part and 'd' or 'f' suffix of floating point number.
454 */
455 private void scanHexFractionAndSuffix(boolean seendigit) {
456 this.radix = 16;
457 assert ch == '.';
458 putChar(ch);
459 scanChar();
460 while (digit(16) >= 0) {
461 seendigit = true;
462 putChar(ch);
463 scanChar();
464 }
465 if (!seendigit)
466 lexError("invalid.hex.number");
467 else
468 scanHexExponentAndSuffix();
469 }
470
471 /** Read a number.
472 * @param radix The radix of the number; one of 8, 10, 16.
473 */
474 private void scanNumber(int radix) {
475 this.radix = radix;
476 // for octal, allow base-10 digit in case it's a float literal
477 int digitRadix = (radix <= 10) ? 10 : 16;
478 boolean seendigit = false;
479 while (digit(digitRadix) >= 0) {
480 seendigit = true;
481 putChar(ch);
482 scanChar();
483 }
484 if (radix == 16 && ch == '.') {
485 scanHexFractionAndSuffix(seendigit);
486 } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
487 scanHexExponentAndSuffix();
488 } else if (radix <= 10 && ch == '.') {
489 putChar(ch);
490 scanChar();
491 scanFractionAndSuffix();
492 } else if (radix <= 10 &&
493 (ch == 'e' || ch == 'E' ||
494 ch == 'f' || ch == 'F' ||
495 ch == 'd' || ch == 'D')) {
496 scanFractionAndSuffix();
497 } else {
498 if (ch == 'l' || ch == 'L') {
499 scanChar();
500 token = LONGLITERAL;
501 } else {
502 token = INTLITERAL;
503 }
504 }
505 }
506
507 /** Read an identifier.
508 */
509 private void scanIdent() {
510 boolean isJavaIdentifierPart;
511 char high;
512 do {
513 if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
514 // optimization, was: putChar(ch);
515
516 scanChar();
517 switch (ch) {
518 case 'A': case 'B': case 'C': case 'D': case 'E':
519 case 'F': case 'G': case 'H': case 'I': case 'J':
520 case 'K': case 'L': case 'M': case 'N': case 'O':
521 case 'P': case 'Q': case 'R': case 'S': case 'T':
522 case 'U': case 'V': case 'W': case 'X': case 'Y':
523 case 'Z':
524 case 'a': case 'b': case 'c': case 'd': case 'e':
525 case 'f': case 'g': case 'h': case 'i': case 'j':
526 case 'k': case 'l': case 'm': case 'n': case 'o':
527 case 'p': case 'q': case 'r': case 's': case 't':
528 case 'u': case 'v': case 'w': case 'x': case 'y':
529 case 'z':
530 case '$': case '_':
531 case '0': case '1': case '2': case '3': case '4':
532 case '5': case '6': case '7': case '8': case '9':
533 case '\u0000': case '\u0001': case '\u0002': case '\u0003':
534 case '\u0004': case '\u0005': case '\u0006': case '\u0007':
535 case '\u0008': case '\u000E': case '\u000F': case '\u0010':
536 case '\u0011': case '\u0012': case '\u0013': case '\u0014':
537 case '\u0015': case '\u0016': case '\u0017':
538 case '\u0018': case '\u0019': case '\u001B':
539 case '\u007F':
540 break;
541 case '\u001A': // EOI is also a legal identifier part
542 if (bp >= buflen) {
543 name = names.fromChars(sbuf, 0, sp);
544 token = keywords.key(name);
545 return;
546 }
547 break;
548 default:
549 if (ch < '\u0080') {
550 // all ASCII range chars already handled, above
551 isJavaIdentifierPart = false;
552 } else {
553 high = scanSurrogates();
554 if (high != 0) {
555 if (sp == sbuf.length) {
556 putChar(high);
557 } else {
558 sbuf[sp++] = high;
559 }
560 isJavaIdentifierPart = Character.isJavaIdentifierPart(
561 Character.toCodePoint(high, ch));
562 } else {
563 isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
564 }
565 }
566 if (!isJavaIdentifierPart) {
567 name = names.fromChars(sbuf, 0, sp);
568 token = keywords.key(name);
569 return;
570 }
571 }
572 } while (true);
573 }
574
575 /** Are surrogates supported?
576 */
577 final static boolean surrogatesSupported = surrogatesSupported();
578 private static boolean surrogatesSupported() {
579 try {
580 Character.isHighSurrogate('a');
581 return true;
582 } catch (NoSuchMethodError ex) {
583 return false;
584 }
585 }
586
587 /** Scan surrogate pairs. If 'ch' is a high surrogate and
588 * the next character is a low surrogate, then put the low
589 * surrogate in 'ch', and return the high surrogate.
590 * otherwise, just return 0.
591 */
592 private char scanSurrogates() {
593 if (surrogatesSupported && Character.isHighSurrogate(ch)) {
594 char high = ch;
595
596 scanChar();
597
598 if (Character.isLowSurrogate(ch)) {
599 return high;
600 }
601
602 ch = high;
603 }
604
605 return 0;
606 }
607
608 /** Return true if ch can be part of an operator.
609 */
610 private boolean isSpecial(char ch) {
611 switch (ch) {
612 case '!': case '%': case '&': case '*': case '?':
613 case '+': case '-': case ':': case '<': case '=':
614 case '>': case '^': case '|': case '~':
615 case '@':
616 case '`': /* emw4 */
617 return true;
618 default:
619 return false;
620 }
621 }
622
623 /** Read longest possible sequence of special characters and convert
624 * to token.
625 */
626 private void scanOperator() {
627 while (true) {
628 putChar(ch);
629 Name newname = names.fromChars(sbuf, 0, sp);
630 if (keywords.key(newname) == IDENTIFIER) {
631 sp--;
632 break;
633 }
634 name = newname;
635 token = keywords.key(newname);
636 scanChar();
637 if (!isSpecial(ch)) break;
638 }
639 }
640
641 /**
642 * Scan a documention comment; determine if a deprecated tag is present.
643 * Called once the initial /, * have been skipped, positioned at the second *
644 * (which is treated as the beginning of the first line).
645 * Stops positioned at the closing '/'.
646 */
647 @SuppressWarnings("fallthrough")
648 private void scanDocComment() {
649 boolean deprecatedPrefix = false;
650
651 forEachLine:
652 while (bp < buflen) {
653
654 // Skip optional WhiteSpace at beginning of line
655 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
656 scanCommentChar();
657 }
658
659 // Skip optional consecutive Stars
660 while (bp < buflen && ch == '*') {
661 scanCommentChar();
662 if (ch == '/') {
663 return;
664 }
665 }
666
667 // Skip optional WhiteSpace after Stars
668 while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
669 scanCommentChar();
670 }
671
672 deprecatedPrefix = false;
673 // At beginning of line in the JavaDoc sense.
674 if (bp < buflen && ch == '@' && !deprecatedFlag) {
675 scanCommentChar();
676 if (bp < buflen && ch == 'd') {
677 scanCommentChar();
678 if (bp < buflen && ch == 'e') {
679 scanCommentChar();
680 if (bp < buflen && ch == 'p') {
681 scanCommentChar();
682 if (bp < buflen && ch == 'r') {
683 scanCommentChar();
684 if (bp < buflen && ch == 'e') {
685 scanCommentChar();
686 if (bp < buflen && ch == 'c') {
687 scanCommentChar();
688 if (bp < buflen && ch == 'a') {
689 scanCommentChar();
690 if (bp < buflen && ch == 't') {
691 scanCommentChar();
692 if (bp < buflen && ch == 'e') {
693 scanCommentChar();
694 if (bp < buflen && ch == 'd') {
695 deprecatedPrefix = true;
696 scanCommentChar();
697 }}}}}}}}}}}
698 if (deprecatedPrefix && bp < buflen) {
699 if (Character.isWhitespace(ch)) {
700 deprecatedFlag = true;
701 } else if (ch == '*') {
702 scanCommentChar();
703 if (ch == '/') {
704 deprecatedFlag = true;
705 return;
706 }
707 }
708 }
709
710 // Skip rest of line
711 while (bp < buflen) {
712 switch (ch) {
713 case '*':
714 scanCommentChar();
715 if (ch == '/') {
716 return;
717 }
718 break;
719 case CR: // (Spec 3.4)
720 scanCommentChar();
721 if (ch != LF) {
722 continue forEachLine;
723 }
724 /* fall through to LF case */
725 case LF: // (Spec 3.4)
726 scanCommentChar();
727 continue forEachLine;
728 default:
729 scanCommentChar();
730 }
731 } // rest of line
732 } // forEachLine
733 return;
734 }
735
736 /** The value of a literal token, recorded as a string.
737 * For integers, leading 0x and 'l' suffixes are suppressed.
738 */
739 public String stringVal() {
740 return new String(sbuf, 0, sp);
741 }
742
743 /** Read token.
744 */
745 public void nextToken() {
746
747 try {
748 prevEndPos = endPos;
749 sp = 0;
750
751 while (true) {
752 pos = bp;
753 switch (ch) {
754 case ' ': // (Spec 3.6)
755 case '\t': // (Spec 3.6)
756 case FF: // (Spec 3.6)
757 do {
758 scanChar();
759 } while (ch == ' ' || ch == '\t' || ch == FF);
760 endPos = bp;
761 processWhiteSpace();
762 break;
763 case LF: // (Spec 3.4)
764 scanChar();
765 endPos = bp;
766 processLineTerminator();
767 break;
768 case CR: // (Spec 3.4)
769 scanChar();
770 if (ch == LF) {
771 scanChar();
772 }
773 endPos = bp;
774 processLineTerminator();
775 break;
776 case 'A': case 'B': case 'C': case 'D': case 'E':
777 case 'F': case 'G': case 'H': case 'I': case 'J':
778 case 'K': case 'L': case 'M': case 'N': case 'O':
779 case 'P': case 'Q': case 'R': case 'S': case 'T':
780 case 'U': case 'V': case 'W': case 'X': case 'Y':
781 case 'Z':
782 case 'a': case 'b': case 'c': case 'd': case 'e':
783 case 'f': case 'g': case 'h': case 'i': case 'j':
784 case 'k': case 'l': case 'm': case 'n': case 'o':
785 case 'p': case 'q': case 'r': case 's': case 't':
786 case 'u': case 'v': case 'w': case 'x': case 'y':
787 case 'z':
788 case '$': case '_':
789 scanIdent();
790 return;
791 case '0':
792 scanChar();
793 if (ch == 'x' || ch == 'X') {
794 scanChar();
795 if (ch == '.') {
796 scanHexFractionAndSuffix(false);
797 } else if (digit(16) < 0) {
798 lexError("invalid.hex.number");
799 } else {
800 scanNumber(16);
801 }
802 } else {
803 putChar('0');
804 scanNumber(8);
805 }
806 return;
807 case '1': case '2': case '3': case '4':
808 case '5': case '6': case '7': case '8': case '9':
809 scanNumber(10);
810 return;
811 case '.':
812 scanChar();
813 if ('0' <= ch && ch <= '9') {
814 putChar('.');
815 scanFractionAndSuffix();
816 } else if (ch == '.') {
817 putChar('.'); putChar('.');
818 scanChar();
819 if (ch == '.') {
820 scanChar();
821 putChar('.');
822 token = ELLIPSIS;
823 } else {
824 lexError("malformed.fp.lit");
825 }
826 } else {
827 token = DOT;
828 }
829 return;
830 case ',':
831 scanChar(); token = COMMA; return;
832 case ';':
833 scanChar(); token = SEMI; return;
834 case '(':
835 scanChar(); token = LPAREN; return;
836 case ')':
837 scanChar(); token = RPAREN; return;
838 case '[':
839 scanChar(); token = LBRACKET; return;
840 case ']':
841 scanChar(); token = RBRACKET; return;
842 case '{':
843 scanChar(); token = LBRACE; return;
844 case '}':
845 scanChar(); token = RBRACE; return;
846 case '/':
847 scanChar();
848 if (ch == '/') {
849 do {
850 scanCommentChar();
851 } while (ch != CR && ch != LF && bp < buflen);
852 if (bp < buflen) {
853 endPos = bp;
854 processComment(CommentStyle.LINE);
855 }
856 break;
857 } else if (ch == '*') {
858 scanChar();
859 CommentStyle style;
860 if (ch == '*') {
861 style = CommentStyle.JAVADOC;
862 scanDocComment();
863 } else {
864 style = CommentStyle.BLOCK;
865 while (bp < buflen) {
866 if (ch == '*') {
867 scanChar();
868 if (ch == '/') break;
869 } else {
870 scanCommentChar();
871 }
872 }
873 }
874 if (ch == '/') {
875 scanChar();
876 endPos = bp;
877 processComment(style);
878 break;
879 } else {
880 lexError("unclosed.comment");
881 return;
882 }
883 } else if (ch == '=') {
884 name = names.slashequals;
885 token = SLASHEQ;
886 scanChar();
887 } else {
888 name = names.slash;
889 token = SLASH;
890 }
891 return;
892 case '\'':
893 scanChar();
894 if (ch == '\'') {
895 lexError("empty.char.lit");
896 } else {
897 if (ch == CR || ch == LF)
898 lexError(pos, "illegal.line.end.in.char.lit");
899 scanLitChar();
900 if (ch == '\'') {
901 scanChar();
902 token = CHARLITERAL;
903 } else {
904 lexError(pos, "unclosed.char.lit");
905 }
906 }
907 return;
908 case '\"':
909 scanChar();
910 while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
911 scanLitChar();
912 if (ch == '\"') {
913 token = STRINGLITERAL;
914 scanChar();
915 } else {
916 lexError(pos, "unclosed.str.lit");
917 }
918 return;
919 default:
920 if (isSpecial(ch)) {
921 scanOperator();
922 } else {
923 boolean isJavaIdentifierStart;
924 if (ch < '\u0080') {
925 // all ASCII range chars already handled, above
926 isJavaIdentifierStart = false;
927 } else {
928 char high = scanSurrogates();
929 if (high != 0) {
930 if (sp == sbuf.length) {
931 putChar(high);
932 } else {
933 sbuf[sp++] = high;
934 }
935
936 isJavaIdentifierStart = Character.isJavaIdentifierStart(
937 Character.toCodePoint(high, ch));
938 } else {
939 isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
940 }
941 }
942 if (isJavaIdentifierStart) {
943 scanIdent();
944 } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
945 token = EOF;
946 pos = bp = eofPos;
947 } else {
948 lexError("illegal.char", String.valueOf((int)ch));
949 scanChar();
950 }
951 }
952 return;
953 }
954 }
955 } finally {
956 endPos = bp;
957 if (scannerDebug)
958 System.out.println("nextToken(" + pos
959 + "," + endPos + ")=|" +
960 new String(getRawCharacters(pos, endPos))
961 + "|");
962 }
963 }
964
965 /** Return the current token, set by nextToken().
966 */
967 public Token token() {
968 return token;
969 }
970
971 /** Sets the current token.
972 */
973 public void token(Token token) {
974 this.token = token;
975 }
976
977 /** Return the current token's position: a 0-based
978 * offset from beginning of the raw input stream
979 * (before unicode translation)
980 */
981 public int pos() {
982 return pos;
983 }
984
985 /** Return the last character position of the current token.
986 */
987 public int endPos() {
988 return endPos;
989 }
990
991 /** Return the last character position of the previous token.
992 */
993 public int prevEndPos() {
994 return prevEndPos;
995 }
996
997 /** Return the position where a lexical error occurred;
998 */
999 public int errPos() {
1000 return errPos;
1001 }
1002
1003 /** Set the position where a lexical error occurred;
1004 */
1005 public void errPos(int pos) {
1006 errPos = pos;
1007 }
1008
1009 /** Return the name of an identifier or token for the current token.
1010 */
1011 public Name name() {
1012 return name;
1013 }
1014
1015 /** Return the radix of a numeric literal token.
1016 */
1017 public int radix() {
1018 return radix;
1019 }
1020
1021 /** Has a @deprecated been encountered in last doc comment?
1022 * This needs to be reset by client with resetDeprecatedFlag.
1023 */
1024 public boolean deprecatedFlag() {
1025 return deprecatedFlag;
1026 }
1027
1028 public void resetDeprecatedFlag() {
1029 deprecatedFlag = false;
1030 }
1031
1032 /**
1033 * Returns the documentation string of the current token.
1034 */
1035 public String docComment() {
1036 return null;
1037 }
1038
1039 /**
1040 * Returns a copy of the input buffer, up to its inputLength.
1041 * Unicode escape sequences are not translated.
1042 */
1043 public char[] getRawCharacters() {
1044 char[] chars = new char[buflen];
1045 System.arraycopy(buf, 0, chars, 0, buflen);
1046 return chars;
1047 }
1048
1049 /**
1050 * Returns a copy of a character array subset of the input buffer.
1051 * The returned array begins at the <code>beginIndex</code> and
1052 * extends to the character at index <code>endIndex - 1</code>.
1053 * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1054 * This behavior is like
1055 * <code>String.substring(beginIndex, endIndex)</code>.
1056 * Unicode escape sequences are not translated.
1057 *
1058 * @param beginIndex the beginning index, inclusive.
1059 * @param endIndex the ending index, exclusive.
1060 * @throws IndexOutOfBounds if either offset is outside of the
1061 * array bounds
1062 */
1063 public char[] getRawCharacters(int beginIndex, int endIndex) {
1064 int length = endIndex - beginIndex;
1065 char[] chars = new char[length];
1066 System.arraycopy(buf, beginIndex, chars, 0, length);
1067 return chars;
1068 }
1069
1070 public enum CommentStyle {
1071 LINE,
1072 BLOCK,
1073 JAVADOC,
1074 }
1075
1076 /**
1077 * Called when a complete comment has been scanned. pos and endPos
1078 * will mark the comment boundary.
1079 */
1080 protected void processComment(CommentStyle style) {
1081 if (scannerDebug)
1082 System.out.println("processComment(" + pos
1083 + "," + endPos + "," + style + ")=|"
1084 + new String(getRawCharacters(pos, endPos))
1085 + "|");
1086 }
1087
1088 /**
1089 * Called when a complete whitespace run has been scanned. pos and endPos
1090 * will mark the whitespace boundary.
1091 */
1092 protected void processWhiteSpace() {
1093 if (scannerDebug)
1094 System.out.println("processWhitespace(" + pos
1095 + "," + endPos + ")=|" +
1096 new String(getRawCharacters(pos, endPos))
1097 + "|");
1098 }
1099
1100 /**
1101 * Called when a line terminator has been processed.
1102 */
1103 protected void processLineTerminator() {
1104 if (scannerDebug)
1105 System.out.println("processTerminator(" + pos
1106 + "," + endPos + ")=|" +
1107 new String(getRawCharacters(pos, endPos))
1108 + "|");
1109 }
1110
1111 /** Build a map for translating between line numbers and
1112 * positions in the input.
1113 *
1114 * @return a LineMap */
1115 public Position.LineMap getLineMap() {
1116 return Position.makeLineMap(buf, buflen, false);
1117 }
1118
1119 }