001    /*
002     * Copyright 2004-2008 Sun Microsystems, Inc.  All Rights Reserved.
003     * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004     *
005     * This code is free software; you can redistribute it and/or modify it
006     * under the terms of the GNU General Public License version 2 only, as
007     * published by the Free Software Foundation.  Sun designates this
008     * particular file as subject to the "Classpath" exception as provided
009     * by Sun in the LICENSE file that accompanied this code.
010     *
011     * This code is distributed in the hope that it will be useful, but WITHOUT
012     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013     * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
014     * version 2 for more details (a copy is included in the LICENSE file that
015     * accompanied this code).
016     *
017     * You should have received a copy of the GNU General Public License version
018     * 2 along with this work; if not, write to the Free Software Foundation,
019     * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020     *
021     * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022     * CA 95054 USA or visit www.sun.com if you need additional information or
023     * have any questions.
024     */
025    
026    package com.sun.tools.javac.parser;
027    
028    import java.nio.*;
029    
030    import com.sun.tools.javac.util.*;
031    import static com.sun.tools.javac.util.LayoutCharacters.*;
032    
033    /** An extension to the base lexical analyzer that captures
034     *  and processes the contents of doc comments.  It does so by
035     *  translating Unicode escape sequences and by stripping the
036     *  leading whitespace and starts from each line of the comment.
037     *
038     *  <p><b>This is NOT part of any API supported by Sun Microsystems.  If
039     *  you write code that depends on this, you do so at your own risk.
040     *  This code and its internal interfaces are subject to change or
041     *  deletion without notice.</b>
042     */
043    public class DocCommentScanner extends Scanner {
044    
045        /** A factory for creating scanners. */
046        public static class Factory extends Scanner.Factory {
047    
048            public static void preRegister(final Context context) {
049                context.put(scannerFactoryKey, new Context.Factory<Scanner.Factory>() {
050                    public Factory make() {
051                        return new Factory(context);
052                    }
053                });
054            }
055    
056            /** Create a new scanner factory. */
057            protected Factory(Context context) {
058                super(context);
059            }
060    
061            @Override
062            public Scanner newScanner(CharSequence input) {
063                if (input instanceof CharBuffer) {
064                    return new DocCommentScanner(this, (CharBuffer)input);
065                } else {
066                    char[] array = input.toString().toCharArray();
067                    return newScanner(array, array.length);
068                }
069            }
070    
071            @Override
072            public Scanner newScanner(char[] input, int inputLength) {
073                return new DocCommentScanner(this, input, inputLength);
074            }
075        }
076    
077    
078        /** Create a scanner from the input buffer.  buffer must implement
079         *  array() and compact(), and remaining() must be less than limit().
080         */
081        protected DocCommentScanner(Factory fac, CharBuffer buffer) {
082            super(fac, buffer);
083        }
084    
085        /** Create a scanner from the input array.  The array must have at
086         *  least a single character of extra space.
087         */
088        protected DocCommentScanner(Factory fac, char[] input, int inputLength) {
089            super(fac, input, inputLength);
090        }
091    
092        /** Starting position of the comment in original source
093         */
094        private int pos;
095    
096        /** The comment input buffer, index of next chacter to be read,
097         *  index of one past last character in buffer.
098         */
099        private char[] buf;
100        private int bp;
101        private int buflen;
102    
103        /** The current character.
104         */
105        private char ch;
106    
107        /** The column number position of the current character.
108         */
109        private int col;
110    
111        /** The buffer index of the last converted Unicode character
112         */
113        private int unicodeConversionBp = 0;
114    
115        /**
116         * Buffer for doc comment.
117         */
118        private char[] docCommentBuffer = new char[1024];
119    
120        /**
121         * Number of characters in doc comment buffer.
122         */
123        private int docCommentCount;
124    
125        /**
126         * Translated and stripped contents of doc comment
127         */
128        private String docComment = null;
129    
130    
131        /** Unconditionally expand the comment buffer.
132         */
133        private void expandCommentBuffer() {
134            char[] newBuffer = new char[docCommentBuffer.length * 2];
135            System.arraycopy(docCommentBuffer, 0, newBuffer,
136                             0, docCommentBuffer.length);
137            docCommentBuffer = newBuffer;
138        }
139    
140        /** Convert an ASCII digit from its base (8, 10, or 16)
141         *  to its value.
142         */
143        private int digit(int base) {
144            char c = ch;
145            int result = Character.digit(c, base);
146            if (result >= 0 && c > 0x7f) {
147                ch = "0123456789abcdef".charAt(result);
148            }
149            return result;
150        }
151    
152        /** Convert Unicode escape; bp points to initial '\' character
153         *  (Spec 3.3).
154         */
155        private void convertUnicode() {
156            if (ch == '\\' && unicodeConversionBp != bp) {
157                bp++; ch = buf[bp]; col++;
158                if (ch == 'u') {
159                    do {
160                        bp++; ch = buf[bp]; col++;
161                    } while (ch == 'u');
162                    int limit = bp + 3;
163                    if (limit < buflen) {
164                        int d = digit(16);
165                        int code = d;
166                        while (bp < limit && d >= 0) {
167                            bp++; ch = buf[bp]; col++;
168                            d = digit(16);
169                            code = (code << 4) + d;
170                        }
171                        if (d >= 0) {
172                            ch = (char)code;
173                            unicodeConversionBp = bp;
174                            return;
175                        }
176                    }
177                    // "illegal.Unicode.esc", reported by base scanner
178                } else {
179                    bp--;
180                    ch = '\\';
181                    col--;
182                }
183            }
184        }
185    
186    
187        /** Read next character.
188         */
189        private void scanChar() {
190            bp++;
191            ch = buf[bp];
192            switch (ch) {
193            case '\r': // return
194                col = 0;
195                break;
196            case '\n': // newline
197                if (bp == 0 || buf[bp-1] != '\r') {
198                    col = 0;
199                }
200                break;
201            case '\t': // tab
202                col = (col / TabInc * TabInc) + TabInc;
203                break;
204            case '\\': // possible Unicode
205                col++;
206                convertUnicode();
207                break;
208            default:
209                col++;
210                break;
211            }
212        }
213    
214        /**
215         * Read next character in doc comment, skipping over double '\' characters.
216         * If a double '\' is skipped, put in the buffer and update buffer count.
217         */
218        private void scanDocCommentChar() {
219            scanChar();
220            if (ch == '\\') {
221                if (buf[bp+1] == '\\' && unicodeConversionBp != bp) {
222                    if (docCommentCount == docCommentBuffer.length)
223                        expandCommentBuffer();
224                    docCommentBuffer[docCommentCount++] = ch;
225                    bp++; col++;
226                } else {
227                    convertUnicode();
228                }
229            }
230        }
231    
232        /* Reset doc comment before reading each new token
233         */
234        public void nextToken() {
235            docComment = null;
236            super.nextToken();
237        }
238    
239        /**
240         * Returns the documentation string of the current token.
241         */
242        public String docComment() {
243            return docComment;
244        }
245    
246        /**
247         * Process a doc comment and make the string content available.
248         * Strips leading whitespace and stars.
249         */
250        @SuppressWarnings("fallthrough")
251        protected void processComment(CommentStyle style) {
252            if (style != CommentStyle.JAVADOC) {
253                return;
254            }
255    
256            pos = pos();
257            buf = getRawCharacters(pos, endPos());
258            buflen = buf.length;
259            bp = 0;
260            col = 0;
261    
262            docCommentCount = 0;
263    
264            boolean firstLine = true;
265    
266            // Skip over first slash
267            scanDocCommentChar();
268            // Skip over first star
269            scanDocCommentChar();
270    
271            // consume any number of stars
272            while (bp < buflen && ch == '*') {
273                scanDocCommentChar();
274            }
275            // is the comment in the form /**/, /***/, /****/, etc. ?
276            if (bp < buflen && ch == '/') {
277                docComment = "";
278                return;
279            }
280    
281            // skip a newline on the first line of the comment.
282            if (bp < buflen) {
283                if (ch == LF) {
284                    scanDocCommentChar();
285                    firstLine = false;
286                } else if (ch == CR) {
287                    scanDocCommentChar();
288                    if (ch == LF) {
289                        scanDocCommentChar();
290                        firstLine = false;
291                    }
292                }
293            }
294    
295        outerLoop:
296    
297            // The outerLoop processes the doc comment, looping once
298            // for each line.  For each line, it first strips off
299            // whitespace, then it consumes any stars, then it
300            // puts the rest of the line into our buffer.
301            while (bp < buflen) {
302    
303                // The wsLoop consumes whitespace from the beginning
304                // of each line.
305            wsLoop:
306    
307                while (bp < buflen) {
308                    switch(ch) {
309                    case ' ':
310                        scanDocCommentChar();
311                        break;
312                    case '\t':
313                        col = ((col - 1) / TabInc * TabInc) + TabInc;
314                        scanDocCommentChar();
315                        break;
316                    case FF:
317                        col = 0;
318                        scanDocCommentChar();
319                        break;
320    // Treat newline at beginning of line (blank line, no star)
321    // as comment text.  Old Javadoc compatibility requires this.
322    /*---------------------------------*
323                    case CR: // (Spec 3.4)
324                        scanDocCommentChar();
325                        if (ch == LF) {
326                            col = 0;
327                            scanDocCommentChar();
328                        }
329                        break;
330                    case LF: // (Spec 3.4)
331                        scanDocCommentChar();
332                        break;
333    *---------------------------------*/
334                    default:
335                        // we've seen something that isn't whitespace;
336                        // jump out.
337                        break wsLoop;
338                    }
339                }
340    
341                // Are there stars here?  If so, consume them all
342                // and check for the end of comment.
343                if (ch == '*') {
344                    // skip all of the stars
345                    do {
346                        scanDocCommentChar();
347                    } while (ch == '*');
348    
349                    // check for the closing slash.
350                    if (ch == '/') {
351                        // We're done with the doc comment
352                        // scanChar() and breakout.
353                        break outerLoop;
354                    }
355                } else if (! firstLine) {
356                    //The current line does not begin with a '*' so we will indent it.
357                    for (int i = 1; i < col; i++) {
358                        if (docCommentCount == docCommentBuffer.length)
359                            expandCommentBuffer();
360                        docCommentBuffer[docCommentCount++] = ' ';
361                    }
362                }
363    
364                // The textLoop processes the rest of the characters
365                // on the line, adding them to our buffer.
366            textLoop:
367                while (bp < buflen) {
368                    switch (ch) {
369                    case '*':
370                        // Is this just a star?  Or is this the
371                        // end of a comment?
372                        scanDocCommentChar();
373                        if (ch == '/') {
374                            // This is the end of the comment,
375                            // set ch and return our buffer.
376                            break outerLoop;
377                        }
378                        // This is just an ordinary star.  Add it to
379                        // the buffer.
380                        if (docCommentCount == docCommentBuffer.length)
381                            expandCommentBuffer();
382                        docCommentBuffer[docCommentCount++] = '*';
383                        break;
384                    case ' ':
385                    case '\t':
386                        if (docCommentCount == docCommentBuffer.length)
387                            expandCommentBuffer();
388                        docCommentBuffer[docCommentCount++] = ch;
389                        scanDocCommentChar();
390                        break;
391                    case FF:
392                        scanDocCommentChar();
393                        break textLoop; // treat as end of line
394                    case CR: // (Spec 3.4)
395                        scanDocCommentChar();
396                        if (ch != LF) {
397                            // Canonicalize CR-only line terminator to LF
398                            if (docCommentCount == docCommentBuffer.length)
399                                expandCommentBuffer();
400                            docCommentBuffer[docCommentCount++] = (char)LF;
401                            break textLoop;
402                        }
403                        /* fall through to LF case */
404                    case LF: // (Spec 3.4)
405                        // We've seen a newline.  Add it to our
406                        // buffer and break out of this loop,
407                        // starting fresh on a new line.
408                        if (docCommentCount == docCommentBuffer.length)
409                            expandCommentBuffer();
410                        docCommentBuffer[docCommentCount++] = ch;
411                        scanDocCommentChar();
412                        break textLoop;
413                    default:
414                        // Add the character to our buffer.
415                        if (docCommentCount == docCommentBuffer.length)
416                            expandCommentBuffer();
417                        docCommentBuffer[docCommentCount++] = ch;
418                        scanDocCommentChar();
419                    }
420                } // end textLoop
421                firstLine = false;
422            } // end outerLoop
423    
424            if (docCommentCount > 0) {
425                int i = docCommentCount - 1;
426            trailLoop:
427                while (i > -1) {
428                    switch (docCommentBuffer[i]) {
429                    case '*':
430                        i--;
431                        break;
432                    default:
433                        break trailLoop;
434                    }
435                }
436                docCommentCount = i + 1;
437    
438                // Store the text of the doc comment
439                docComment = new String(docCommentBuffer, 0 , docCommentCount);
440            } else {
441                docComment = "";
442            }
443        }
444    
445        /** Build a map for translating between line numbers and
446         * positions in the input.
447         *
448         * @return a LineMap */
449        public Position.LineMap getLineMap() {
450            char[] buf = getRawCharacters();
451            return Position.makeLineMap(buf, buf.length, true);
452        }
453    }