/*
* Copyright (c) 1994, 2004, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package sun.tools.java;
import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
A Scanner for Java tokens. Errors are reported
to the environment object.
The scanner keeps track of the current token,
the value of the current token (if any), and the start
position of the current token.
The scan() method advances the scanner to the next
token in the input.
The match() method is used to quickly match opening
brackets (ie: '(', '{', or '[') with their closing
counter part. This is useful during error recovery.
An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
this means that both the line number and the exact offset into
the file are encoded in each position value.
The compiler treats either "\n", "\r" or "\r\n" as the
end of a line.
WARNING: The contents of this source file are not part of any
supported API. Code that depends on them does so at its own risk:
they are subject to change or removal without notice.
Author: Arthur van Hoff
/**
* A Scanner for Java tokens. Errors are reported
* to the environment object.<p>
*
* The scanner keeps track of the current token,
* the value of the current token (if any), and the start
* position of the current token.<p>
*
* The scan() method advances the scanner to the next
* token in the input.<p>
*
* The match() method is used to quickly match opening
* brackets (ie: '(', '{', or '[') with their closing
* counter part. This is useful during error recovery.<p>
*
* An position consists of: ((linenr << WHEREOFFSETBITS) | offset)
* this means that both the line number and the exact offset into
* the file are encoded in each position value.<p>
*
* The compiler treats either "\n", "\r" or "\r\n" as the
* end of a line.<p>
*
* WARNING: The contents of this source file are not part of any
* supported API. Code that depends on them does so at its own risk:
* they are subject to change or removal without notice.
*
* @author Arthur van Hoff
*/
public
class Scanner implements Constants {
The increment for each character.
/**
* The increment for each character.
*/
public static final long OFFSETINC = 1;
The increment for each line.
/**
* The increment for each line.
*/
public static final long LINEINC = 1L << WHEREOFFSETBITS;
End of input
/**
* End of input
*/
public static final int EOF = -1;
Where errors are reported
/**
* Where errors are reported
*/
public Environment env;
Input reader
/**
* Input reader
*/
protected ScannerInputReader in;
If true, present all comments as tokens.
Contents are not saved, but positions are recorded accurately,
so the comment can be recovered from the text.
Line terminations are also returned as comment tokens,
and may be distinguished by their start and end positions,
which are equal (meaning, these tokens contain no chars).
/**
* If true, present all comments as tokens.
* Contents are not saved, but positions are recorded accurately,
* so the comment can be recovered from the text.
* Line terminations are also returned as comment tokens,
* and may be distinguished by their start and end positions,
* which are equal (meaning, these tokens contain no chars).
*/
public boolean scanComments = false;
Current token
/**
* Current token
*/
public int token;
The position of the current token
/**
* The position of the current token
*/
public long pos;
The position of the previous token
/**
* The position of the previous token
*/
public long prevPos;
The current character
/**
* The current character
*/
protected int ch;
/*
* Token values.
*/
public char charValue;
public int intValue;
public long longValue;
public float floatValue;
public double doubleValue;
public String stringValue;
public Identifier idValue;
public int radix; // Radix, when reading int or long
/*
* A doc comment preceding the most recent token
*/
public String docComment;
/*
* A growable character buffer.
*/
private int count;
private char buffer[] = new char[1024];
private void growBuffer() {
char newBuffer[] = new char[buffer.length * 2];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
}
// The following two methods have been hand-inlined in
// scanDocComment. If you make changes here, you should
// check to see if scanDocComment also needs modification.
private void putc(int ch) {
if (count == buffer.length) {
growBuffer();
}
buffer[count++] = (char)ch;
}
private String bufferString() {
return new String(buffer, 0, count);
}
Create a scanner to scan an input stream.
/**
* Create a scanner to scan an input stream.
*/
public Scanner(Environment env, InputStream in) throws IOException {
this.env = env;
useInputStream(in);
}
Setup input from the given input stream,
and scan the first token from it.
/**
* Setup input from the given input stream,
* and scan the first token from it.
*/
protected void useInputStream(InputStream in) throws IOException {
try {
this.in = new ScannerInputReader(env, in);
} catch (Exception e) {
env.setCharacterEncoding(null);
this.in = new ScannerInputReader(env, in);
}
ch = this.in.read();
prevPos = this.in.pos;
scan();
}
Create a scanner to scan an input stream.
/**
* Create a scanner to scan an input stream.
*/
protected Scanner(Environment env) {
this.env = env;
// Expect the subclass to call useInputStream at the right time.
}
Define a keyword.
/**
* Define a keyword.
*/
private static void defineKeyword(int val) {
Identifier.lookup(opNames[val]).setType(val);
}
Initialized keyword and token Hashtables
/**
* Initialized keyword and token Hashtables
*/
static {
// Statement keywords
defineKeyword(FOR);
defineKeyword(IF);
defineKeyword(ELSE);
defineKeyword(WHILE);
defineKeyword(DO);
defineKeyword(SWITCH);
defineKeyword(CASE);
defineKeyword(DEFAULT);
defineKeyword(BREAK);
defineKeyword(CONTINUE);
defineKeyword(RETURN);
defineKeyword(TRY);
defineKeyword(CATCH);
defineKeyword(FINALLY);
defineKeyword(THROW);
// Type defineKeywords
defineKeyword(BYTE);
defineKeyword(CHAR);
defineKeyword(SHORT);
defineKeyword(INT);
defineKeyword(LONG);
defineKeyword(FLOAT);
defineKeyword(DOUBLE);
defineKeyword(VOID);
defineKeyword(BOOLEAN);
// Expression keywords
defineKeyword(INSTANCEOF);
defineKeyword(TRUE);
defineKeyword(FALSE);
defineKeyword(NEW);
defineKeyword(THIS);
defineKeyword(SUPER);
defineKeyword(NULL);
// Declaration keywords
defineKeyword(IMPORT);
defineKeyword(CLASS);
defineKeyword(EXTENDS);
defineKeyword(IMPLEMENTS);
defineKeyword(INTERFACE);
defineKeyword(PACKAGE);
defineKeyword(THROWS);
// Modifier keywords
defineKeyword(PRIVATE);
defineKeyword(PUBLIC);
defineKeyword(PROTECTED);
defineKeyword(STATIC);
defineKeyword(TRANSIENT);
defineKeyword(SYNCHRONIZED);
defineKeyword(NATIVE);
defineKeyword(ABSTRACT);
defineKeyword(VOLATILE);
defineKeyword(FINAL);
defineKeyword(STRICTFP);
// reserved keywords
defineKeyword(CONST);
defineKeyword(GOTO);
}
Scan a comment. This method should be
called once the initial /, * and the next
character have been read.
/**
* Scan a comment. This method should be
* called once the initial /, * and the next
* character have been read.
*/
private void skipComment() throws IOException {
while (true) {
switch (ch) {
case EOF:
env.error(pos, "eof.in.comment");
return;
case '*':
if ((ch = in.read()) == '/') {
ch = in.read();
return;
}
break;
default:
ch = in.read();
break;
}
}
}
Scan a doc comment. This method should be called
once the initial /, * and * have been read. It gathers
the content of the comment (witout leading spaces and '*'s)
in the string buffer.
/**
* Scan a doc comment. This method should be called
* once the initial /, * and * have been read. It gathers
* the content of the comment (witout leading spaces and '*'s)
* in the string buffer.
*/
private String scanDocComment() throws IOException {
// Note: this method has been hand-optimized to yield
// better performance. This was done after it was noted
// that javadoc spent a great deal of its time here.
// This should also help the performance of the compiler
// as well -- it scans the doc comments to find
// @deprecated tags.
//
// The logic of the method has been completely rewritten
// to avoid the use of flags that need to be looked at
// for every character read. Members that are accessed
// more than once have been stored in local variables.
// The methods putc() and bufferString() have been
// inlined by hand. Extra cases have been added to
// switch statements to trick the compiler into generating
// a tableswitch instead of a lookupswitch.
//
// This implementation aims to preserve the previous
// behavior of this method.
int c;
// Put `in' in a local variable.
final ScannerInputReader in = this.in;
// We maintain the buffer locally rather than calling putc().
char[] buffer = this.buffer;
int count = 0;
// We are called pointing at the second star of the doc
// comment:
//
// Input: /** the rest of the comment ... */
// ^
//
// We rely on this in the code below.
// Consume any number of stars.
while ((c = in.read()) == '*')
;
// Is the comment of the form /**/, /***/, /****/, etc.?
if (c == '/') {
// Set ch and return
ch = in.read();
return "";
}
// Skip a newline on the first line of the comment.
if (c == '\n') {
c = in.read();
}
outerLoop:
// The outerLoop processes the doc comment, looping once
// for each line. For each line, it first strips off
// whitespace, then it consumes any stars, then it
// puts the rest of the line into our buffer.
while (true) {
// The wsLoop consumes whitespace from the beginning
// of each line.
wsLoop:
while (true) {
switch (c) {
case ' ':
case '\t':
// We could check for other forms of whitespace
// as well, but this is left as is for minimum
// disturbance of functionality.
//
// Just skip whitespace.
c = in.read();
break;
// We have added extra cases here to trick the
// compiler into using a tableswitch instead of
// a lookupswitch. They can be removed without
// a change in meaning.
case 10: case 11: case 12: case 13: case 14: case 15:
case 16: case 17: case 18: case 19: case 20: case 21:
case 22: case 23: case 24: case 25: case 26: case 27:
case 28: case 29: case 30: case 31:
default:
// We've seen something that isn't whitespace,
// jump out.
break wsLoop;
}
} // end wsLoop.
// Are there stars here? If so, consume them all
// and check for the end of comment.
if (c == '*') {
// Skip all of the stars...
do {
c = in.read();
} while (c == '*');
// ...then check for the closing slash.
if (c == '/') {
// We're done with the doc comment.
// Set ch and break out.
ch = in.read();
break outerLoop;
}
}
// The textLoop processes the rest of the characters
// on the line, adding them to our buffer.
textLoop:
while (true) {
switch (c) {
case EOF:
// We've seen a premature EOF. Break out
// of the loop.
env.error(pos, "eof.in.comment");
ch = EOF;
break outerLoop;
case '*':
// Is this just a star? Or is this the
// end of a comment?
c = in.read();
if (c == '/') {
// This is the end of the comment,
// set ch and return our buffer.
ch = in.read();
break outerLoop;
}
// This is just an ordinary star. Add it to
// the buffer.
if (count == buffer.length) {
growBuffer();
buffer = this.buffer;
}
buffer[count++] = '*';
break;
case '\n':
// We've seen a newline. Add it to our
// buffer and break out of this loop,
// starting fresh on a new line.
if (count == buffer.length) {
growBuffer();
buffer = this.buffer;
}
buffer[count++] = '\n';
c = in.read();
break textLoop;
// Again, the extra cases here are a trick
// to get the compiler to generate a tableswitch.
case 0: case 1: case 2: case 3: case 4: case 5:
case 6: case 7: case 8: case 11: case 12: case 13:
case 14: case 15: case 16: case 17: case 18: case 19:
case 20: case 21: case 22: case 23: case 24: case 25:
case 26: case 27: case 28: case 29: case 30: case 31:
case 32: case 33: case 34: case 35: case 36: case 37:
case 38: case 39: case 40:
default:
// Add the character to our buffer.
if (count == buffer.length) {
growBuffer();
buffer = this.buffer;
}
buffer[count++] = (char)c;
c = in.read();
break;
}
} // end textLoop
} // end outerLoop
// We have scanned our doc comment. It is stored in
// buffer. The previous implementation of scanDocComment
// stripped off all trailing spaces and stars from the comment.
// We will do this as well, so as to cause a minimum of
// disturbance. Is this what we want?
if (count > 0) {
int i = count - 1;
trailLoop:
while (i > -1) {
switch (buffer[i]) {
case ' ':
case '\t':
case '*':
i--;
break;
// And again, the extra cases here are a trick
// to get the compiler to generate a tableswitch.
case 0: case 1: case 2: case 3: case 4: case 5:
case 6: case 7: case 8: case 10: case 11: case 12:
case 13: case 14: case 15: case 16: case 17: case 18:
case 19: case 20: case 21: case 22: case 23: case 24:
case 25: case 26: case 27: case 28: case 29: case 30:
case 31: case 33: case 34: case 35: case 36: case 37:
case 38: case 39: case 40:
default:
break trailLoop;
}
}
count = i + 1;
// Return the text of the doc comment.
return new String(buffer, 0, count);
} else {
return "";
}
}
Scan a number. The first digit of the number should be the current
character. We may be scanning hex, decimal, or octal at this point
/**
* Scan a number. The first digit of the number should be the current
* character. We may be scanning hex, decimal, or octal at this point
*/
private void scanNumber() throws IOException {
boolean seenNonOctal = false;
boolean overflow = false;
boolean seenDigit = false; // used to detect invalid hex number 0xL
radix = (ch == '0' ? 8 : 10);
long value = ch - '0';
count = 0;
putc(ch); // save character in buffer
numberLoop:
for (;;) {
switch (ch = in.read()) {
case '.':
if (radix == 16)
break numberLoop; // an illegal character
scanReal();
return;
case '8': case '9':
// We can't yet throw an error if reading an octal. We might
// discover we're really reading a real.
seenNonOctal = true;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
seenDigit = true;
putc(ch);
if (radix == 10) {
overflow = overflow || (value * 10)/10 != value;
value = (value * 10) + (ch - '0');
overflow = overflow || (value - 1 < -1);
} else if (radix == 8) {
overflow = overflow || (value >>> 61) != 0;
value = (value << 3) + (ch - '0');
} else {
overflow = overflow || (value >>> 60) != 0;
value = (value << 4) + (ch - '0');
}
break;
case 'd': case 'D': case 'e': case 'E': case 'f': case 'F':
if (radix != 16) {
scanReal();
return;
}
// fall through
case 'a': case 'A': case 'b': case 'B': case 'c': case 'C':
seenDigit = true;
putc(ch);
if (radix != 16)
break numberLoop; // an illegal character
overflow = overflow || (value >>> 60) != 0;
value = (value << 4) + 10 +
Character.toLowerCase((char)ch) - 'a';
break;
case 'l': case 'L':
ch = in.read(); // skip over 'l'
longValue = value;
token = LONGVAL;
break numberLoop;
case 'x': case 'X':
// if the first character is a '0' and this is the second
// letter, then read in a hexadecimal number. Otherwise, error.
if (count == 1 && radix == 8) {
radix = 16;
seenDigit = false;
break;
} else {
// we'll get an illegal character error
break numberLoop;
}
default:
intValue = (int)value;
token = INTVAL;
break numberLoop;
}
} // while true
// We have just finished reading the number. The next thing better
// not be a letter or digit.
// Note: There will be deprecation warnings against these uses
// of Character.isJavaLetterOrDigit and Character.isJavaLetter.
// Do not fix them yet; allow the compiler to run on pre-JDK1.1 VMs.
if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
env.error(in.pos, "invalid.number");
do { ch = in.read(); }
while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
intValue = 0;
token = INTVAL;
} else if (radix == 8 && seenNonOctal) {
// A bogus octal literal.
intValue = 0;
token = INTVAL;
env.error(pos, "invalid.octal.number");
} else if (radix == 16 && seenDigit == false) {
// A hex literal with no digits, 0xL, for example.
intValue = 0;
token = INTVAL;
env.error(pos, "invalid.hex.number");
} else {
if (token == INTVAL) {
// Check for overflow. Note that base 10 literals
// have different rules than base 8 and 16.
overflow = overflow ||
(value & 0xFFFFFFFF00000000L) != 0 ||
(radix == 10 && value > 2147483648L);
if (overflow) {
intValue = 0;
// Give a specific error message which tells
// the user the range.
switch (radix) {
case 8:
env.error(pos, "overflow.int.oct");
break;
case 10:
env.error(pos, "overflow.int.dec");
break;
case 16:
env.error(pos, "overflow.int.hex");
break;
default:
throw new CompilerError("invalid radix");
}
}
} else {
if (overflow) {
longValue = 0;
// Give a specific error message which tells
// the user the range.
switch (radix) {
case 8:
env.error(pos, "overflow.long.oct");
break;
case 10:
env.error(pos, "overflow.long.dec");
break;
case 16:
env.error(pos, "overflow.long.hex");
break;
default:
throw new CompilerError("invalid radix");
}
}
}
}
}
Scan a float. We are either looking at the decimal, or we have already
seen it and put it into the buffer. We haven't seen an exponent.
Scan a float. Should be called with the current character is either
the 'e', 'E' or '.'
/**
* Scan a float. We are either looking at the decimal, or we have already
* seen it and put it into the buffer. We haven't seen an exponent.
* Scan a float. Should be called with the current character is either
* the 'e', 'E' or '.'
*/
private void scanReal() throws IOException {
boolean seenExponent = false;
boolean isSingleFloat = false;
char lastChar;
if (ch == '.') {
putc(ch);
ch = in.read();
}
numberLoop:
for ( ; ; ch = in.read()) {
switch (ch) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
putc(ch);
break;
case 'e': case 'E':
if (seenExponent)
break numberLoop; // we'll get a format error
putc(ch);
seenExponent = true;
break;
case '+': case '-':
lastChar = buffer[count - 1];
if (lastChar != 'e' && lastChar != 'E')
break numberLoop; // this isn't an error, though!
putc(ch);
break;
case 'f': case 'F':
ch = in.read(); // skip over 'f'
isSingleFloat = true;
break numberLoop;
case 'd': case 'D':
ch = in.read(); // skip over 'd'
// fall through
default:
break numberLoop;
} // sswitch
} // loop
// we have just finished reading the number. The next thing better
// not be a letter or digit.
if (Character.isJavaLetterOrDigit((char)ch) || ch == '.') {
env.error(in.pos, "invalid.number");
do { ch = in.read(); }
while (Character.isJavaLetterOrDigit((char)ch) || ch == '.');
doubleValue = 0;
token = DOUBLEVAL;
} else {
token = isSingleFloat ? FLOATVAL : DOUBLEVAL;
try {
lastChar = buffer[count - 1];
if (lastChar == 'e' || lastChar == 'E'
|| lastChar == '+' || lastChar == '-') {
env.error(in.pos -1, "float.format");
} else if (isSingleFloat) {
String string = bufferString();
floatValue = Float.valueOf(string).floatValue();
if (Float.isInfinite(floatValue)) {
env.error(pos, "overflow.float");
} else if (floatValue == 0 && !looksLikeZero(string)) {
env.error(pos, "underflow.float");
}
} else {
String string = bufferString();
doubleValue = Double.valueOf(string).doubleValue();
if (Double.isInfinite(doubleValue)) {
env.error(pos, "overflow.double");
} else if (doubleValue == 0 && !looksLikeZero(string)) {
env.error(pos, "underflow.double");
}
}
} catch (NumberFormatException ee) {
env.error(pos, "float.format");
doubleValue = 0;
floatValue = 0;
}
}
return;
}
// We have a token that parses as a number. Is this token possibly zero?
// i.e. does it have a non-zero value in the mantissa?
private static boolean looksLikeZero(String token) {
int length = token.length();
for (int i = 0; i < length; i++) {
switch (token.charAt(i)) {
case 0: case '.':
continue;
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
return false;
case 'e': case 'E': case 'f': case 'F':
return true;
}
}
return true;
}
Scan an escape character.
Returns: the character or -1 if it escaped an
end-of-line.
/**
* Scan an escape character.
* @return the character or -1 if it escaped an
* end-of-line.
*/
private int scanEscapeChar() throws IOException {
long p = in.pos;
switch (ch = in.read()) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7': {
int n = ch - '0';
for (int i = 2 ; i > 0 ; i--) {
switch (ch = in.read()) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
n = (n << 3) + ch - '0';
break;
default:
if (n > 0xFF) {
env.error(p, "invalid.escape.char");
}
return n;
}
}
ch = in.read();
if (n > 0xFF) {
env.error(p, "invalid.escape.char");
}
return n;
}
case 'r': ch = in.read(); return '\r';
case 'n': ch = in.read(); return '\n';
case 'f': ch = in.read(); return '\f';
case 'b': ch = in.read(); return '\b';
case 't': ch = in.read(); return '\t';
case '\\': ch = in.read(); return '\\';
case '\"': ch = in.read(); return '\"';
case '\'': ch = in.read(); return '\'';
}
env.error(p, "invalid.escape.char");
ch = in.read();
return -1;
}
Scan a string. The current character
should be the opening " of the string.
/**
* Scan a string. The current character
* should be the opening " of the string.
*/
private void scanString() throws IOException {
token = STRINGVAL;
count = 0;
ch = in.read();
// Scan a String
while (true) {
switch (ch) {
case EOF:
env.error(pos, "eof.in.string");
stringValue = bufferString();
return;
case '\r':
case '\n':
ch = in.read();
env.error(pos, "newline.in.string");
stringValue = bufferString();
return;
case '"':
ch = in.read();
stringValue = bufferString();
return;
case '\\': {
int c = scanEscapeChar();
if (c >= 0) {
putc((char)c);
}
break;
}
default:
putc(ch);
ch = in.read();
break;
}
}
}
Scan a character. The current character should be
the opening ' of the character constant.
/**
* Scan a character. The current character should be
* the opening ' of the character constant.
*/
private void scanCharacter() throws IOException {
token = CHARVAL;
switch (ch = in.read()) {
case '\\':
int c = scanEscapeChar();
charValue = (char)((c >= 0) ? c : 0);
break;
case '\'':
// There are two standard problems this case deals with. One
// is the malformed single quote constant (i.e. the programmer
// uses ''' instead of '\'') and the other is the empty
// character constant (i.e. ''). Just consume any number of
// single quotes and emit an error message.
charValue = 0;
env.error(pos, "invalid.char.constant");
ch = in.read();
while (ch == '\'') {
ch = in.read();
}
return;
case '\r':
case '\n':
charValue = 0;
env.error(pos, "invalid.char.constant");
return;
default:
charValue = (char)ch;
ch = in.read();
break;
}
if (ch == '\'') {
ch = in.read();
} else {
env.error(pos, "invalid.char.constant");
while (true) {
switch (ch) {
case '\'':
ch = in.read();
return;
case ';':
case '\n':
case EOF:
return;
default:
ch = in.read();
}
}
}
}
Scan an Identifier. The current character should
be the first character of the identifier.
/**
* Scan an Identifier. The current character should
* be the first character of the identifier.
*/
private void scanIdentifier() throws IOException {
count = 0;
while (true) {
putc(ch);
switch (ch = in.read()) {
case 'a': case 'b': case 'c': case 'd': case 'e':
case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'm': case 'n': case 'o':
case 'p': case 'q': case 'r': case 's': case 't':
case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E':
case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O':
case 'P': case 'Q': case 'R': case 'S': case 'T':
case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '$': case '_':
break;
default:
if (!Character.isJavaLetterOrDigit((char)ch)) {
idValue = Identifier.lookup(bufferString());
token = idValue.getType();
return;
}
}
}
}
The ending position of the current token
/**
* The ending position of the current token
*/
// Note: This should be part of the pos itself.
public long getEndPos() {
return in.pos;
}
If the current token is IDENT, return the identifier occurrence.
It will be freshly allocated.
/**
* If the current token is IDENT, return the identifier occurrence.
* It will be freshly allocated.
*/
public IdentifierToken getIdToken() {
return (token != IDENT) ? null : new IdentifierToken(pos, idValue);
}
Scan the next token.
Returns: the position of the previous token.
/**
* Scan the next token.
* @return the position of the previous token.
*/
public long scan() throws IOException {
return xscan();
}
protected long xscan() throws IOException {
final ScannerInputReader in = this.in;
long retPos = pos;
prevPos = in.pos;
docComment = null;
while (true) {
pos = in.pos;
switch (ch) {
case EOF:
token = EOF;
return retPos;
case '\n':
if (scanComments) {
ch = ' ';
// Avoid this path the next time around.
// Do not just call in.read; we want to present
// a null token (and also avoid read-ahead).
token = COMMENT;
return retPos;
}
case ' ':
case '\t':
case '\f':
ch = in.read();
break;
case '/':
switch (ch = in.read()) {
case '/':
// Parse a // comment
while (((ch = in.read()) != EOF) && (ch != '\n'));
if (scanComments) {
token = COMMENT;
return retPos;
}
break;
case '*':
ch = in.read();
if (ch == '*') {
docComment = scanDocComment();
} else {
skipComment();
}
if (scanComments) {
return retPos;
}
break;
case '=':
ch = in.read();
token = ASGDIV;
return retPos;
default:
token = DIV;
return retPos;
}
break;
case '"':
scanString();
return retPos;
case '\'':
scanCharacter();
return retPos;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
scanNumber();
return retPos;
case '.':
switch (ch = in.read()) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
count = 0;
putc('.');
scanReal();
break;
default:
token = FIELD;
}
return retPos;
case '{':
ch = in.read();
token = LBRACE;
return retPos;
case '}':
ch = in.read();
token = RBRACE;
return retPos;
case '(':
ch = in.read();
token = LPAREN;
return retPos;
case ')':
ch = in.read();
token = RPAREN;
return retPos;
case '[':
ch = in.read();
token = LSQBRACKET;
return retPos;
case ']':
ch = in.read();
token = RSQBRACKET;
return retPos;
case ',':
ch = in.read();
token = COMMA;
return retPos;
case ';':
ch = in.read();
token = SEMICOLON;
return retPos;
case '?':
ch = in.read();
token = QUESTIONMARK;
return retPos;
case '~':
ch = in.read();
token = BITNOT;
return retPos;
case ':':
ch = in.read();
token = COLON;
return retPos;
case '-':
switch (ch = in.read()) {
case '-':
ch = in.read();
token = DEC;
return retPos;
case '=':
ch = in.read();
token = ASGSUB;
return retPos;
}
token = SUB;
return retPos;
case '+':
switch (ch = in.read()) {
case '+':
ch = in.read();
token = INC;
return retPos;
case '=':
ch = in.read();
token = ASGADD;
return retPos;
}
token = ADD;
return retPos;
case '<':
switch (ch = in.read()) {
case '<':
if ((ch = in.read()) == '=') {
ch = in.read();
token = ASGLSHIFT;
return retPos;
}
token = LSHIFT;
return retPos;
case '=':
ch = in.read();
token = LE;
return retPos;
}
token = LT;
return retPos;
case '>':
switch (ch = in.read()) {
case '>':
switch (ch = in.read()) {
case '=':
ch = in.read();
token = ASGRSHIFT;
return retPos;
case '>':
if ((ch = in.read()) == '=') {
ch = in.read();
token = ASGURSHIFT;
return retPos;
}
token = URSHIFT;
return retPos;
}
token = RSHIFT;
return retPos;
case '=':
ch = in.read();
token = GE;
return retPos;
}
token = GT;
return retPos;
case '|':
switch (ch = in.read()) {
case '|':
ch = in.read();
token = OR;
return retPos;
case '=':
ch = in.read();
token = ASGBITOR;
return retPos;
}
token = BITOR;
return retPos;
case '&':
switch (ch = in.read()) {
case '&':
ch = in.read();
token = AND;
return retPos;
case '=':
ch = in.read();
token = ASGBITAND;
return retPos;
}
token = BITAND;
return retPos;
case '=':
if ((ch = in.read()) == '=') {
ch = in.read();
token = EQ;
return retPos;
}
token = ASSIGN;
return retPos;
case '%':
if ((ch = in.read()) == '=') {
ch = in.read();
token = ASGREM;
return retPos;
}
token = REM;
return retPos;
case '^':
if ((ch = in.read()) == '=') {
ch = in.read();
token = ASGBITXOR;
return retPos;
}
token = BITXOR;
return retPos;
case '!':
if ((ch = in.read()) == '=') {
ch = in.read();
token = NE;
return retPos;
}
token = NOT;
return retPos;
case '*':
if ((ch = in.read()) == '=') {
ch = in.read();
token = ASGMUL;
return retPos;
}
token = MUL;
return retPos;
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '$': case '_':
scanIdentifier();
return retPos;
case '\u001a':
// Our one concession to DOS.
if ((ch = in.read()) == EOF) {
token = EOF;
return retPos;
}
env.error(pos, "funny.char");
ch = in.read();
break;
default:
if (Character.isJavaLetter((char)ch)) {
scanIdentifier();
return retPos;
}
env.error(pos, "funny.char");
ch = in.read();
break;
}
}
}
Scan to a matching '}', ']' or ')'. The current token must be
a '{', '[' or '(';
/**
* Scan to a matching '}', ']' or ')'. The current token must be
* a '{', '[' or '(';
*/
public void match(int open, int close) throws IOException {
int depth = 1;
while (true) {
scan();
if (token == open) {
depth++;
} else if (token == close) {
if (--depth == 0) {
return;
}
} else if (token == EOF) {
env.error(pos, "unbalanced.paren");
return;
}
}
}
}