/*
 * FindBugs - Find bugs in Java programs
 * Copyright (C) 2003,2004 University of Maryland
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package edu.umd.cs.findbugs;

import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.util.BitSet;

A simple tokenizer for Java source text. This is not intended to be a compliant lexer; instead, it is for quick and dirty scanning.
Author:David Hovemeyer
See Also:
/** * A simple tokenizer for Java source text. This is not intended to be a * compliant lexer; instead, it is for quick and dirty scanning. * * @author David Hovemeyer * @see Token */
public class Tokenizer { private static final BitSet whiteSpace = new BitSet(); static { whiteSpace.set(' '); whiteSpace.set('\t'); whiteSpace.set('\r'); whiteSpace.set('\f'); } private static final BitSet single = new BitSet(); static { single.set('!'); single.set('%'); single.set('^'); single.set('&'); single.set('*'); single.set('('); single.set(')'); single.set('-'); single.set('+'); single.set('='); single.set('['); single.set(']'); single.set('{'); single.set('}'); single.set('|'); single.set(':'); single.set(';'); single.set(','); single.set('.'); single.set('<'); single.set('>'); single.set('?'); single.set('~'); } private final PushbackReader reader;
Constructor.
Params:
  • reader – the Reader for the Java source text
/** * Constructor. * * @param reader * the Reader for the Java source text */
public Tokenizer(Reader reader) { this.reader = new PushbackReader(reader); }
Get the next Token in the stream.
Returns:the Token
/** * Get the next Token in the stream. * * @return the Token */
public Token next() throws IOException { skipWhitespace(); int c = reader.read(); if (c < 0) { return new Token(Token.EOF); } else if (c == '\n') { return new Token(Token.EOL); } else if (c == '\'' || c == '"') { return munchString(c); } else if (c == '/') { return maybeComment(); } else if (single.get(c)) { return new Token(Token.SINGLE, String.valueOf((char) c)); } else { reader.unread(c); return parseWord(); } } private void skipWhitespace() throws IOException { for (;;) { int c = reader.read(); if (c < 0) { break; } if (!whiteSpace.get(c)) { reader.unread(c); break; } } } private Token munchString(int delimiter) throws IOException { final int SCAN = 0; final int ESCAPE = 1; final int DONE = 2; StringBuilder result = new StringBuilder(); result.append((char) delimiter); int state = SCAN; while (state != DONE) { int c = reader.read(); if (c < 0) { break; } result.append((char) c); switch (state) { case SCAN: if (c == delimiter) { state = DONE; } else if (c == '\\') { state = ESCAPE; } break; case ESCAPE: state = SCAN; break; default: break; } } return new Token(Token.STRING, result.toString()); } private Token maybeComment() throws IOException { int c = reader.read(); if (c == '/') { // Single line comment StringBuilder result = new StringBuilder(); result.append("//"); for (;;) { c = reader.read(); if (c < 0) { break; } else if (c == '\n') { reader.unread(c); break; } result.append((char) c); } return new Token(Token.COMMENT, result.toString()); } else if (c == '*') { // C-style multiline comment StringBuilder result = new StringBuilder(); result.append("/*"); final int SCAN = 0; final int STAR = 1; final int DONE = 2; int state = SCAN; while (state != DONE) { c = reader.read(); if (c < 0) { state = DONE; } else { result.append((char) c); } switch (state) { case SCAN: if (c == '*') { state = STAR; } break; case STAR: if (c == '/') { state = DONE; } else if (c != '*') { state = SCAN; } break; case DONE: break; } } return new Token(Token.COMMENT, result.toString()); } else { if (c >= 0) { reader.unread(c); } return new Token(Token.SINGLE, "/"); } } private Token parseWord() throws IOException { StringBuilder result = new StringBuilder(); for (;;) { int c = reader.read(); if (c < 0) { break; } if (whiteSpace.get(c) || c == '\n' || single.get(c)) { reader.unread(c); break; } result.append((char) c); } return new Token(Token.WORD, result.toString()); } }