001    /*
002     * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
003     * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004     *
005     * This code is free software; you can redistribute it and/or modify it
006     * under the terms of the GNU General Public License version 2 only, as
007     * published by the Free Software Foundation.  Sun designates this
008     * particular file as subject to the "Classpath" exception as provided
009     * by Sun in the LICENSE file that accompanied this code.
010     *
011     * This code is distributed in the hope that it will be useful, but WITHOUT
012     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013     * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
014     * version 2 for more details (a copy is included in the LICENSE file that
015     * accompanied this code).
016     *
017     * You should have received a copy of the GNU General Public License version
018     * 2 along with this work; if not, write to the Free Software Foundation,
019     * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020     *
021     * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022     * CA 95054 USA or visit www.sun.com if you need additional information or
023     * have any questions.
024     */
025    
026    package com.sun.tools.javac.parser;
027    
028    import java.nio.*;
029    
030    import com.sun.tools.javac.code.Source;
031    import com.sun.tools.javac.file.JavacFileManager;
032    import com.sun.tools.javac.util.*;
033    
034    
035    import static com.sun.tools.javac.parser.Token.*;
036    import static com.sun.tools.javac.util.LayoutCharacters.*;
037    
038    /** The lexical analyzer maps an input stream consisting of
039     *  ASCII characters and Unicode escapes into a token sequence.
040     *
041     *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
042     *  you write code that depends on this, you do so at your own risk.
043     *  This code and its internal interfaces are subject to change or
044     *  deletion without notice.</b>
045     */
046    public class Scanner implements Lexer {
047    
048        private static boolean scannerDebug = false;
049    
050        /** A factory for creating scanners. */
051        public static class Factory {
052            /** The context key for the scanner factory. */
053            public static final Context.Key<Scanner.Factory> scannerFactoryKey =
054                new Context.Key<Scanner.Factory>();
055    
056            /** Get the Factory instance for this context. */
057            public static Factory instance(Context context) {
058                Factory instance = context.get(scannerFactoryKey);
059                if (instance == null)
060                    instance = new Factory(context);
061                return instance;
062            }
063    
064            final Log log;
065            final Names names;
066            final Source source;
067            final Keywords keywords;
068    
069            /** Create a new scanner factory. */
070            protected Factory(Context context) {
071                context.put(scannerFactoryKey, this);
072                this.log = Log.instance(context);
073                this.names = Names.instance(context);
074                this.source = Source.instance(context);
075                this.keywords = Keywords.instance(context);
076            }
077    
078            public Scanner newScanner(CharSequence input) {
079                if (input instanceof CharBuffer) {
080                    return new Scanner(this, (CharBuffer)input);
081                } else {
082                    char[] array = input.toString().toCharArray();
083                    return newScanner(array, array.length);
084                }
085            }
086    
087            public Scanner newScanner(char[] input, int inputLength) {
088                return new Scanner(this, input, inputLength);
089            }
090        }
091    
092        /* Output variables; set by nextToken():
093         */
094    
095        /** The token, set by nextToken().
096         */
097        private Token token;
098    
099        /** Allow hex floating-point literals.
100         */
101        private boolean allowHexFloats;
102    
103        /** The token's position, 0-based offset from beginning of text.
104         */
105        private int pos;
106    
107        /** Character position just after the last character of the token.
108         */
109        private int endPos;
110    
111        /** The last character position of the previous token.
112         */
113        private int prevEndPos;
114    
115        /** The position where a lexical error occurred;
116         */
117        private int errPos = Position.NOPOS;
118    
119        /** The name of an identifier or token:
120         */
121        private Name name;
122    
123        /** The radix of a numeric literal token.
124         */
125        private int radix;
126    
127        /** Has a @deprecated been encountered in last doc comment?
128         *  this needs to be reset by client.
129         */
130        protected boolean deprecatedFlag = false;
131    
132        /** A character buffer for literals.
133         */
134        private char[] sbuf = new char[128];
135        private int sp;
136    
137        /** The input buffer, index of next chacter to be read,
138         *  index of one past last character in buffer.
139         */
140        private char[] buf;
141        private int bp;
142        private int buflen;
143        private int eofPos;
144    
145        /** The current character.
146         */
147        private char ch;
148    
149        /** The buffer index of the last converted unicode character
150         */
151        private int unicodeConversionBp = -1;
152    
153        /** The log to be used for error reporting.
154         */
155        private final Log log;
156    
157        /** The name table. */
158        private final Names names;
159    
160        /** The keyword table. */
161        private final Keywords keywords;
162    
163        /** Common code for constructors. */
164        private Scanner(Factory fac) {
165            this.log = fac.log;
166            this.names = fac.names;
167            this.keywords = fac.keywords;
168            this.allowHexFloats = fac.source.allowHexFloats();
169        }
170    
171        private static final boolean hexFloatsWork = hexFloatsWork();
172        private static boolean hexFloatsWork() {
173            try {
174                Float.valueOf("0x1.0p1");
175                return true;
176            } catch (NumberFormatException ex) {
177                return false;
178            }
179        }
180    
181        /** Create a scanner from the input buffer.  buffer must implement
182         *  array() and compact(), and remaining() must be less than limit().
183         */
184        protected Scanner(Factory fac, CharBuffer buffer) {
185            this(fac, JavacFileManager.toArray(buffer), buffer.limit());
186        }
187    
188        /**
189         * Create a scanner from the input array.  This method might
190         * modify the array.  To avoid copying the input array, ensure
191         * that {@code inputLength < input.length} or
192         * {@code input[input.length -1]} is a white space character.
193         *
194         * @param fac the factory which created this Scanner
195         * @param input the input, might be modified
196         * @param inputLength the size of the input.
197         * Must be positive and less than or equal to input.length.
198         */
199        protected Scanner(Factory fac, char[] input, int inputLength) {
200            this(fac);
201            eofPos = inputLength;
202            if (inputLength == input.length) {
203                if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
204                    inputLength--;
205                } else {
206                    char[] newInput = new char[inputLength + 1];
207                    System.arraycopy(input, 0, newInput, 0, input.length);
208                    input = newInput;
209                }
210            }
211            buf = input;
212            buflen = inputLength;
213            buf[buflen] = EOI;
214            bp = -1;
215            scanChar();
216        }
217    
218        /** Report an error at the given position using the provided arguments.
219         */
220        private void lexError(int pos, String key, Object... args) {
221            log.error(pos, key, args);
222            token = ERROR;
223            errPos = pos;
224        }
225    
226        /** Report an error at the current token position using the provided
227         *  arguments.
228         */
229        private void lexError(String key, Object... args) {
230            lexError(pos, key, args);
231        }
232    
233        /** Convert an ASCII digit from its base (8, 10, or 16)
234         *  to its value.
235         */
236        private int digit(int base) {
237            char c = ch;
238            int result = Character.digit(c, base);
239            if (result >= 0 && c > 0x7f) {
240                lexError(pos+1, "illegal.nonascii.digit");
241                ch = "0123456789abcdef".charAt(result);
242            }
243            return result;
244        }
245    
246        /** Convert unicode escape; bp points to initial '\' character
247         *  (Spec 3.3).
248         */
249        private void convertUnicode() {
250            if (ch == '\\' && unicodeConversionBp != bp) {
251                bp++; ch = buf[bp];
252                if (ch == 'u') {
253                    do {
254                        bp++; ch = buf[bp];
255                    } while (ch == 'u');
256                    int limit = bp + 3;
257                    if (limit < buflen) {
258                        int d = digit(16);
259                        int code = d;
260                        while (bp < limit && d >= 0) {
261                            bp++; ch = buf[bp];
262                            d = digit(16);
263                            code = (code << 4) + d;
264                        }
265                        if (d >= 0) {
266                            ch = (char)code;
267                            unicodeConversionBp = bp;
268                            return;
269                        }
270                    }
271                    lexError(bp, "illegal.unicode.esc");
272                } else {
273                    bp--;
274                    ch = '\\';
275                }
276            }
277        }
278    
279        /** Read next character.
280         */
281        private void scanChar() {
282            ch = buf[++bp];
283            if (ch == '\\') {
284                convertUnicode();
285            }
286        }
287    
288        /** Read next character in comment, skipping over double '\' characters.
289         */
290        private void scanCommentChar() {
291            scanChar();
292            if (ch == '\\') {
293                if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
294                    bp++;
295                } else {
296                    convertUnicode();
297                }
298            }
299        }
300    
301        /** Append a character to sbuf.
302         */
303        private void putChar(char ch) {
304            if (sp == sbuf.length) {
305                char[] newsbuf = new char[sbuf.length * 2];
306                System.arraycopy(sbuf, 0, newsbuf, 0, sbuf.length);
307                sbuf = newsbuf;
308            }
309            sbuf[sp++] = ch;
310        }
311    
312        /** For debugging purposes: print character.
313         */
314        private void dch() {
315            System.err.print(ch); System.out.flush();
316        }
317    
318        /** Read next character in character or string literal and copy into sbuf.
319         */
320        private void scanLitChar() {
321            if (ch == '\\') {
322                if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
323                    bp++;
324                    putChar('\\');
325                    scanChar();
326                } else {
327                    scanChar();
328                    switch (ch) {
329                    case '0': case '1': case '2': case '3':
330                    case '4': case '5': case '6': case '7':
331                        char leadch = ch;
332                        int oct = digit(8);
333                        scanChar();
334                        if ('0' <= ch && ch <= '7') {
335                            oct = oct * 8 + digit(8);
336                            scanChar();
337                            if (leadch <= '3' && '0' <= ch && ch <= '7') {
338                                oct = oct * 8 + digit(8);
339                                scanChar();
340                            }
341                        }
342                        putChar((char)oct);
343                        break;
344                    case 'b':
345                        putChar('\b'); scanChar(); break;
346                    case 't':
347                        putChar('\t'); scanChar(); break;
348                    case 'n':
349                        putChar('\n'); scanChar(); break;
350                    case 'f':
351                        putChar('\f'); scanChar(); break;
352                    case 'r':
353                        putChar('\r'); scanChar(); break;
354                    case '\'':
355                        putChar('\''); scanChar(); break;
356                    case '\"':
357                        putChar('\"'); scanChar(); break;
358                    case '\\':
359                        putChar('\\'); scanChar(); break;
360                    default:
361                        lexError(bp, "illegal.esc.char");
362                    }
363                }
364            } else if (bp != buflen) {
365                putChar(ch); scanChar();
366            }
367        }
368    
369        /** Read fractional part of hexadecimal floating point number.
370         */
371        private void scanHexExponentAndSuffix() {
372            if (ch == 'p' || ch == 'P') {
373                putChar(ch);
374                scanChar();
375                if (ch == '+' || ch == '-') {
376                    putChar(ch);
377                    scanChar();
378                }
379                if ('0' <= ch && ch <= '9') {
380                    do {
381                        putChar(ch);
382                        scanChar();
383                    } while ('0' <= ch && ch <= '9');
384                    if (!allowHexFloats) {
385                        lexError("unsupported.fp.lit");
386                        allowHexFloats = true;
387                    }
388                    else if (!hexFloatsWork)
389                        lexError("unsupported.cross.fp.lit");
390                } else
391                    lexError("malformed.fp.lit");
392            } else {
393                lexError("malformed.fp.lit");
394            }
395            if (ch == 'f' || ch == 'F') {
396                putChar(ch);
397                scanChar();
398                token = FLOATLITERAL;
399            } else {
400                if (ch == 'd' || ch == 'D') {
401                    putChar(ch);
402                    scanChar();
403                }
404                token = DOUBLELITERAL;
405            }
406        }
407    
408        /** Read fractional part of floating point number.
409         */
410        private void scanFraction() {
411            while (digit(10) >= 0) {
412                putChar(ch);
413                scanChar();
414            }
415            int sp1 = sp;
416            if (ch == 'e' || ch == 'E') {
417                putChar(ch);
418                scanChar();
419                if (ch == '+' || ch == '-') {
420                    putChar(ch);
421                    scanChar();
422                }
423                if ('0' <= ch && ch <= '9') {
424                    do {
425                        putChar(ch);
426                        scanChar();
427                    } while ('0' <= ch && ch <= '9');
428                    return;
429                }
430                lexError("malformed.fp.lit");
431                sp = sp1;
432            }
433        }
434    
435        /** Read fractional part and 'd' or 'f' suffix of floating point number.
436         */
437        private void scanFractionAndSuffix() {
438            this.radix = 10;
439            scanFraction();
440            if (ch == 'f' || ch == 'F') {
441                putChar(ch);
442                scanChar();
443                token = FLOATLITERAL;
444            } else {
445                if (ch == 'd' || ch == 'D') {
446                    putChar(ch);
447                    scanChar();
448                }
449                token = DOUBLELITERAL;
450            }
451        }
452    
453        /** Read fractional part and 'd' or 'f' suffix of floating point number.
454         */
455        private void scanHexFractionAndSuffix(boolean seendigit) {
456            this.radix = 16;
457            assert ch == '.';
458            putChar(ch);
459            scanChar();
460            while (digit(16) >= 0) {
461                seendigit = true;
462                putChar(ch);
463                scanChar();
464            }
465            if (!seendigit)
466                lexError("invalid.hex.number");
467            else
468                scanHexExponentAndSuffix();
469        }
470    
471        /** Read a number.
472         *  @param radix  The radix of the number; one of 8, 10, 16.
473         */
474        private void scanNumber(int radix) {
475            this.radix = radix;
476            // for octal, allow base-10 digit in case it's a float literal
477            int digitRadix = (radix <= 10) ? 10 : 16;
478            boolean seendigit = false;
479            while (digit(digitRadix) >= 0) {
480                seendigit = true;
481                putChar(ch);
482                scanChar();
483            }
484            if (radix == 16 && ch == '.') {
485                scanHexFractionAndSuffix(seendigit);
486            } else if (seendigit && radix == 16 && (ch == 'p' || ch == 'P')) {
487                scanHexExponentAndSuffix();
488            } else if (radix <= 10 && ch == '.') {
489                putChar(ch);
490                scanChar();
491                scanFractionAndSuffix();
492            } else if (radix <= 10 &&
493                       (ch == 'e' || ch == 'E' ||
494                        ch == 'f' || ch == 'F' ||
495                        ch == 'd' || ch == 'D')) {
496                scanFractionAndSuffix();
497            } else {
498                if (ch == 'l' || ch == 'L') {
499                    scanChar();
500                    token = LONGLITERAL;
501                } else {
502                    token = INTLITERAL;
503                }
504            }
505        }
506    
507        /** Read an identifier.
508         */
509        private void scanIdent() {
510            boolean isJavaIdentifierPart;
511            char high;
512            do {
513                if (sp == sbuf.length) putChar(ch); else sbuf[sp++] = ch;
514                // optimization, was: putChar(ch);
515    
516                scanChar();
517                switch (ch) {
518                case 'A': case 'B': case 'C': case 'D': case 'E':
519                case 'F': case 'G': case 'H': case 'I': case 'J':
520                case 'K': case 'L': case 'M': case 'N': case 'O':
521                case 'P': case 'Q': case 'R': case 'S': case 'T':
522                case 'U': case 'V': case 'W': case 'X': case 'Y':
523                case 'Z':
524                case 'a': case 'b': case 'c': case 'd': case 'e':
525                case 'f': case 'g': case 'h': case 'i': case 'j':
526                case 'k': case 'l': case 'm': case 'n': case 'o':
527                case 'p': case 'q': case 'r': case 's': case 't':
528                case 'u': case 'v': case 'w': case 'x': case 'y':
529                case 'z':
530                case '$': case '_':
531                case '0': case '1': case '2': case '3': case '4':
532                case '5': case '6': case '7': case '8': case '9':
533                case '\u0000': case '\u0001': case '\u0002': case '\u0003':
534                case '\u0004': case '\u0005': case '\u0006': case '\u0007':
535                case '\u0008': case '\u000E': case '\u000F': case '\u0010':
536                case '\u0011': case '\u0012': case '\u0013': case '\u0014':
537                case '\u0015': case '\u0016': case '\u0017':
538                case '\u0018': case '\u0019': case '\u001B':
539                case '\u007F':
540                    break;
541                case '\u001A': // EOI is also a legal identifier part
542                    if (bp >= buflen) {
543                        name = names.fromChars(sbuf, 0, sp);
544                        token = keywords.key(name);
545                        return;
546                    }
547                    break;
548                default:
549                    if (ch < '\u0080') {
550                        // all ASCII range chars already handled, above
551                        isJavaIdentifierPart = false;
552                    } else {
553                        high = scanSurrogates();
554                        if (high != 0) {
555                            if (sp == sbuf.length) {
556                                putChar(high);
557                            } else {
558                                sbuf[sp++] = high;
559                            }
560                            isJavaIdentifierPart = Character.isJavaIdentifierPart(
561                                Character.toCodePoint(high, ch));
562                        } else {
563                            isJavaIdentifierPart = Character.isJavaIdentifierPart(ch);
564                        }
565                    }
566                    if (!isJavaIdentifierPart) {
567                        name = names.fromChars(sbuf, 0, sp);
568                        token = keywords.key(name);
569                        return;
570                    }
571                }
572            } while (true);
573        }
574    
575        /** Are surrogates supported?
576         */
577        final static boolean surrogatesSupported = surrogatesSupported();
578        private static boolean surrogatesSupported() {
579            try {
580                Character.isHighSurrogate('a');
581                return true;
582            } catch (NoSuchMethodError ex) {
583                return false;
584            }
585        }
586    
587        /** Scan surrogate pairs.  If 'ch' is a high surrogate and
588         *  the next character is a low surrogate, then put the low
589         *  surrogate in 'ch', and return the high surrogate.
590         *  otherwise, just return 0.
591         */
592        private char scanSurrogates() {
593            if (surrogatesSupported && Character.isHighSurrogate(ch)) {
594                char high = ch;
595    
596                scanChar();
597    
598                if (Character.isLowSurrogate(ch)) {
599                    return high;
600                }
601    
602                ch = high;
603            }
604    
605            return 0;
606        }
607    
608        /** Return true if ch can be part of an operator.
609         */
610        private boolean isSpecial(char ch) {
611            switch (ch) {
612            case '!': case '%': case '&': case '*': case '?':
613            case '+': case '-': case ':': case '<': case '=':
614            case '>': case '^': case '|': case '~':
615            case '@':
616            case '`': /* emw4 */
617                return true;
618            default:
619                return false;
620            }
621        }
622    
623        /** Read longest possible sequence of special characters and convert
624         *  to token.
625         */
626        private void scanOperator() {
627            while (true) {
628                putChar(ch);
629                Name newname = names.fromChars(sbuf, 0, sp);
630                if (keywords.key(newname) == IDENTIFIER) {
631                    sp--;
632                    break;
633                }
634                name = newname;
635                token = keywords.key(newname);
636                scanChar();
637                if (!isSpecial(ch)) break;
638            }
639        }
640    
641        /**
642         * Scan a documention comment; determine if a deprecated tag is present.
643         * Called once the initial /, * have been skipped, positioned at the second *
644         * (which is treated as the beginning of the first line).
645         * Stops positioned at the closing '/'.
646         */
647        @SuppressWarnings("fallthrough")
648        private void scanDocComment() {
649            boolean deprecatedPrefix = false;
650    
651            forEachLine:
652            while (bp < buflen) {
653    
654                // Skip optional WhiteSpace at beginning of line
655                while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
656                    scanCommentChar();
657                }
658    
659                // Skip optional consecutive Stars
660                while (bp < buflen && ch == '*') {
661                    scanCommentChar();
662                    if (ch == '/') {
663                        return;
664                    }
665                }
666    
667                // Skip optional WhiteSpace after Stars
668                while (bp < buflen && (ch == ' ' || ch == '\t' || ch == FF)) {
669                    scanCommentChar();
670                }
671    
672                deprecatedPrefix = false;
673                // At beginning of line in the JavaDoc sense.
674                if (bp < buflen && ch == '@' && !deprecatedFlag) {
675                    scanCommentChar();
676                    if (bp < buflen && ch == 'd') {
677                        scanCommentChar();
678                        if (bp < buflen && ch == 'e') {
679                            scanCommentChar();
680                            if (bp < buflen && ch == 'p') {
681                                scanCommentChar();
682                                if (bp < buflen && ch == 'r') {
683                                    scanCommentChar();
684                                    if (bp < buflen && ch == 'e') {
685                                        scanCommentChar();
686                                        if (bp < buflen && ch == 'c') {
687                                            scanCommentChar();
688                                            if (bp < buflen && ch == 'a') {
689                                                scanCommentChar();
690                                                if (bp < buflen && ch == 't') {
691                                                    scanCommentChar();
692                                                    if (bp < buflen && ch == 'e') {
693                                                        scanCommentChar();
694                                                        if (bp < buflen && ch == 'd') {
695                                                            deprecatedPrefix = true;
696                                                            scanCommentChar();
697                                                        }}}}}}}}}}}
698                if (deprecatedPrefix && bp < buflen) {
699                    if (Character.isWhitespace(ch)) {
700                        deprecatedFlag = true;
701                    } else if (ch == '*') {
702                        scanCommentChar();
703                        if (ch == '/') {
704                            deprecatedFlag = true;
705                            return;
706                        }
707                    }
708                }
709    
710                // Skip rest of line
711                while (bp < buflen) {
712                    switch (ch) {
713                    case '*':
714                        scanCommentChar();
715                        if (ch == '/') {
716                            return;
717                        }
718                        break;
719                    case CR: // (Spec 3.4)
720                        scanCommentChar();
721                        if (ch != LF) {
722                            continue forEachLine;
723                        }
724                        /* fall through to LF case */
725                    case LF: // (Spec 3.4)
726                        scanCommentChar();
727                        continue forEachLine;
728                    default:
729                        scanCommentChar();
730                    }
731                } // rest of line
732            } // forEachLine
733            return;
734        }
735    
736        /** The value of a literal token, recorded as a string.
737         *  For integers, leading 0x and 'l' suffixes are suppressed.
738         */
739        public String stringVal() {
740            return new String(sbuf, 0, sp);
741        }
742    
743        /** Read token.
744         */
745        public void nextToken() {
746    
747            try {
748                prevEndPos = endPos;
749                sp = 0;
750    
751                while (true) {
752                    pos = bp;
753                    switch (ch) {
754                    case ' ': // (Spec 3.6)
755                    case '\t': // (Spec 3.6)
756                    case FF: // (Spec 3.6)
757                        do {
758                            scanChar();
759                        } while (ch == ' ' || ch == '\t' || ch == FF);
760                        endPos = bp;
761                        processWhiteSpace();
762                        break;
763                    case LF: // (Spec 3.4)
764                        scanChar();
765                        endPos = bp;
766                        processLineTerminator();
767                        break;
768                    case CR: // (Spec 3.4)
769                        scanChar();
770                        if (ch == LF) {
771                            scanChar();
772                        }
773                        endPos = bp;
774                        processLineTerminator();
775                        break;
776                    case 'A': case 'B': case 'C': case 'D': case 'E':
777                    case 'F': case 'G': case 'H': case 'I': case 'J':
778                    case 'K': case 'L': case 'M': case 'N': case 'O':
779                    case 'P': case 'Q': case 'R': case 'S': case 'T':
780                    case 'U': case 'V': case 'W': case 'X': case 'Y':
781                    case 'Z':
782                    case 'a': case 'b': case 'c': case 'd': case 'e':
783                    case 'f': case 'g': case 'h': case 'i': case 'j':
784                    case 'k': case 'l': case 'm': case 'n': case 'o':
785                    case 'p': case 'q': case 'r': case 's': case 't':
786                    case 'u': case 'v': case 'w': case 'x': case 'y':
787                    case 'z':
788                    case '$': case '_':
789                        scanIdent();
790                        return;
791                    case '0':
792                        scanChar();
793                        if (ch == 'x' || ch == 'X') {
794                            scanChar();
795                            if (ch == '.') {
796                                scanHexFractionAndSuffix(false);
797                            } else if (digit(16) < 0) {
798                                lexError("invalid.hex.number");
799                            } else {
800                                scanNumber(16);
801                            }
802                        } else {
803                            putChar('0');
804                            scanNumber(8);
805                        }
806                        return;
807                    case '1': case '2': case '3': case '4':
808                    case '5': case '6': case '7': case '8': case '9':
809                        scanNumber(10);
810                        return;
811                    case '.':
812                        scanChar();
813                        if ('0' <= ch && ch <= '9') {
814                            putChar('.');
815                            scanFractionAndSuffix();
816                        } else if (ch == '.') {
817                            putChar('.'); putChar('.');
818                            scanChar();
819                            if (ch == '.') {
820                                scanChar();
821                                putChar('.');
822                                token = ELLIPSIS;
823                            } else {
824                                lexError("malformed.fp.lit");
825                            }
826                        } else {
827                            token = DOT;
828                        }
829                        return;
830                    case ',':
831                        scanChar(); token = COMMA; return;
832                    case ';':
833                        scanChar(); token = SEMI; return;
834                    case '(':
835                        scanChar(); token = LPAREN; return;
836                    case ')':
837                        scanChar(); token = RPAREN; return;
838                    case '[':
839                        scanChar(); token = LBRACKET; return;
840                    case ']':
841                        scanChar(); token = RBRACKET; return;
842                    case '{':
843                        scanChar(); token = LBRACE; return;
844                    case '}':
845                        scanChar(); token = RBRACE; return;
846                    case '/':
847                        scanChar();
848                        if (ch == '/') {
849                            do {
850                                scanCommentChar();
851                            } while (ch != CR && ch != LF && bp < buflen);
852                            if (bp < buflen) {
853                                endPos = bp;
854                                processComment(CommentStyle.LINE);
855                            }
856                            break;
857                        } else if (ch == '*') {
858                            scanChar();
859                            CommentStyle style;
860                            if (ch == '*') {
861                                style = CommentStyle.JAVADOC;
862                                scanDocComment();
863                            } else {
864                                style = CommentStyle.BLOCK;
865                                while (bp < buflen) {
866                                    if (ch == '*') {
867                                        scanChar();
868                                        if (ch == '/') break;
869                                    } else {
870                                        scanCommentChar();
871                                    }
872                                }
873                            }
874                            if (ch == '/') {
875                                scanChar();
876                                endPos = bp;
877                                processComment(style);
878                                break;
879                            } else {
880                                lexError("unclosed.comment");
881                                return;
882                            }
883                        } else if (ch == '=') {
884                            name = names.slashequals;
885                            token = SLASHEQ;
886                            scanChar();
887                        } else {
888                            name = names.slash;
889                            token = SLASH;
890                        }
891                        return;
892                    case '\'':
893                        scanChar();
894                        if (ch == '\'') {
895                            lexError("empty.char.lit");
896                        } else {
897                            if (ch == CR || ch == LF)
898                                lexError(pos, "illegal.line.end.in.char.lit");
899                            scanLitChar();
900                            if (ch == '\'') {
901                                scanChar();
902                                token = CHARLITERAL;
903                            } else {
904                                lexError(pos, "unclosed.char.lit");
905                            }
906                        }
907                        return;
908                    case '\"':
909                        scanChar();
910                        while (ch != '\"' && ch != CR && ch != LF && bp < buflen)
911                            scanLitChar();
912                        if (ch == '\"') {
913                            token = STRINGLITERAL;
914                            scanChar();
915                        } else {
916                            lexError(pos, "unclosed.str.lit");
917                        }
918                        return;
919                    default:
920                        if (isSpecial(ch)) {
921                            scanOperator();
922                        } else {
923                            boolean isJavaIdentifierStart;
924                            if (ch < '\u0080') {
925                                // all ASCII range chars already handled, above
926                                isJavaIdentifierStart = false;
927                            } else {
928                                char high = scanSurrogates();
929                                if (high != 0) {
930                                    if (sp == sbuf.length) {
931                                        putChar(high);
932                                    } else {
933                                        sbuf[sp++] = high;
934                                    }
935    
936                                    isJavaIdentifierStart = Character.isJavaIdentifierStart(
937                                        Character.toCodePoint(high, ch));
938                                } else {
939                                    isJavaIdentifierStart = Character.isJavaIdentifierStart(ch);
940                                }
941                            }
942                            if (isJavaIdentifierStart) {
943                                scanIdent();
944                            } else if (bp == buflen || ch == EOI && bp+1 == buflen) { // JLS 3.5
945                                token = EOF;
946                                pos = bp = eofPos;
947                            } else {
948                                lexError("illegal.char", String.valueOf((int)ch));
949                                scanChar();
950                            }
951                        }
952                        return;
953                    }
954                }
955            } finally {
956                endPos = bp;
957                if (scannerDebug)
958                    System.out.println("nextToken(" + pos
959                                       + "," + endPos + ")=|" +
960                                       new String(getRawCharacters(pos, endPos))
961                                       + "|");
962            }
963        }
964    
965        /** Return the current token, set by nextToken().
966         */
967        public Token token() {
968            return token;
969        }
970    
971        /** Sets the current token.
972         */
973        public void token(Token token) {
974            this.token = token;
975        }
976    
977        /** Return the current token's position: a 0-based
978         *  offset from beginning of the raw input stream
979         *  (before unicode translation)
980         */
981        public int pos() {
982            return pos;
983        }
984    
985        /** Return the last character position of the current token.
986         */
987        public int endPos() {
988            return endPos;
989        }
990    
991        /** Return the last character position of the previous token.
992         */
993        public int prevEndPos() {
994            return prevEndPos;
995        }
996    
997        /** Return the position where a lexical error occurred;
998         */
999        public int errPos() {
1000            return errPos;
1001        }
1002    
1003        /** Set the position where a lexical error occurred;
1004         */
1005        public void errPos(int pos) {
1006            errPos = pos;
1007        }
1008    
1009        /** Return the name of an identifier or token for the current token.
1010         */
1011        public Name name() {
1012            return name;
1013        }
1014    
1015        /** Return the radix of a numeric literal token.
1016         */
1017        public int radix() {
1018            return radix;
1019        }
1020    
1021        /** Has a @deprecated been encountered in last doc comment?
1022         *  This needs to be reset by client with resetDeprecatedFlag.
1023         */
1024        public boolean deprecatedFlag() {
1025            return deprecatedFlag;
1026        }
1027    
1028        public void resetDeprecatedFlag() {
1029            deprecatedFlag = false;
1030        }
1031    
1032        /**
1033         * Returns the documentation string of the current token.
1034         */
1035        public String docComment() {
1036            return null;
1037        }
1038    
1039        /**
1040         * Returns a copy of the input buffer, up to its inputLength.
1041         * Unicode escape sequences are not translated.
1042         */
1043        public char[] getRawCharacters() {
1044            char[] chars = new char[buflen];
1045            System.arraycopy(buf, 0, chars, 0, buflen);
1046            return chars;
1047        }
1048    
1049        /**
1050         * Returns a copy of a character array subset of the input buffer.
1051         * The returned array begins at the <code>beginIndex</code> and
1052         * extends to the character at index <code>endIndex - 1</code>.
1053         * Thus the length of the substring is <code>endIndex-beginIndex</code>.
1054         * This behavior is like
1055         * <code>String.substring(beginIndex, endIndex)</code>.
1056         * Unicode escape sequences are not translated.
1057         *
1058         * @param beginIndex the beginning index, inclusive.
1059         * @param endIndex the ending index, exclusive.
1060         * @throws IndexOutOfBounds if either offset is outside of the
1061         *         array bounds
1062         */
1063        public char[] getRawCharacters(int beginIndex, int endIndex) {
1064            int length = endIndex - beginIndex;
1065            char[] chars = new char[length];
1066            System.arraycopy(buf, beginIndex, chars, 0, length);
1067            return chars;
1068        }
1069    
1070        public enum CommentStyle {
1071            LINE,
1072            BLOCK,
1073            JAVADOC,
1074        }
1075    
1076        /**
1077         * Called when a complete comment has been scanned. pos and endPos
1078         * will mark the comment boundary.
1079         */
1080        protected void processComment(CommentStyle style) {
1081            if (scannerDebug)
1082                System.out.println("processComment(" + pos
1083                                   + "," + endPos + "," + style + ")=|"
1084                                   + new String(getRawCharacters(pos, endPos))
1085                                   + "|");
1086        }
1087    
1088        /**
1089         * Called when a complete whitespace run has been scanned. pos and endPos
1090         * will mark the whitespace boundary.
1091         */
1092        protected void processWhiteSpace() {
1093            if (scannerDebug)
1094                System.out.println("processWhitespace(" + pos
1095                                   + "," + endPos + ")=|" +
1096                                   new String(getRawCharacters(pos, endPos))
1097                                   + "|");
1098        }
1099    
1100        /**
1101         * Called when a line terminator has been processed.
1102         */
1103        protected void processLineTerminator() {
1104            if (scannerDebug)
1105                System.out.println("processTerminator(" + pos
1106                                   + "," + endPos + ")=|" +
1107                                   new String(getRawCharacters(pos, endPos))
1108                                   + "|");
1109        }
1110    
1111        /** Build a map for translating between line numbers and
1112         * positions in the input.
1113         *
1114         * @return a LineMap */
1115        public Position.LineMap getLineMap() {
1116            return Position.makeLineMap(buf, buflen, false);
1117        }
1118    
1119    }