org.jruby/jruby-core/9.2.9.0 : org/jruby/ext/ripper/RipperLexer.java

RipperLexer
https://github.com/jruby/jruby/jruby-core: JRuby is the effort to recreate the Ruby (https://www.ruby-lang.org) interpreter in Java. (JRuby)
headius
enebo
wmeissner
BanzaiMan
mkristian
/*
 ***** BEGIN LICENSE BLOCK *****
 * Version: EPL 2.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Eclipse Public
 * License Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/epl-v20.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2013-2017 The JRuby Team (jruby@jruby.org)
 * 
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the EPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the EPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/

package org.jruby.ext.ripper;

import java.io.IOException;
import java.math.BigDecimal;
import java.util.HashMap;
import org.jcodings.Encoding;
import org.jruby.Ruby;
import org.jruby.lexer.LexerSource;
import org.jruby.lexer.LexingCommon;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.jruby.util.RegexpOptions;
import org.jruby.util.SafeDoubleParser;
import org.jruby.util.StringSupport;
import org.jruby.util.cli.Options;

import static org.jruby.ext.ripper.RipperParser.tSP;

Author: enebo/**
 *
 * @author enebo
 */
public class RipperLexer extends LexingCommon {
    private static final HashMap<String, Keyword> map;

    static {
        map = new HashMap<>();
        map.put("end", Keyword.END);
        map.put("else", Keyword.ELSE);
        map.put("case", Keyword.CASE);
        map.put("ensure", Keyword.ENSURE);
        map.put("module", Keyword.MODULE);
        map.put("elsif", Keyword.ELSIF);
        map.put("def", Keyword.DEF);
        map.put("rescue", Keyword.RESCUE);
        map.put("not", Keyword.NOT);
        map.put("then", Keyword.THEN);
        map.put("yield", Keyword.YIELD);
        map.put("for", Keyword.FOR);
        map.put("self", Keyword.SELF);
        map.put("false", Keyword.FALSE);
        map.put("retry", Keyword.RETRY);
        map.put("return", Keyword.RETURN);
        map.put("true", Keyword.TRUE);
        map.put("if", Keyword.IF);
        map.put("defined?", Keyword.DEFINED_P);
        map.put("super", Keyword.SUPER);
        map.put("undef", Keyword.UNDEF);
        map.put("break", Keyword.BREAK);
        map.put("in", Keyword.IN);
        map.put("do", Keyword.DO);
        map.put("nil", Keyword.NIL);
        map.put("until", Keyword.UNTIL);
        map.put("unless", Keyword.UNLESS);
        map.put("or", Keyword.OR);
        map.put("next", Keyword.NEXT);
        map.put("when", Keyword.WHEN);
        map.put("redo", Keyword.REDO);
        map.put("and", Keyword.AND);
        map.put("begin", Keyword.BEGIN);
        map.put("__LINE__", Keyword.__LINE__);
        map.put("class", Keyword.CLASS);
        map.put("__FILE__", Keyword.__FILE__);
        map.put("END", Keyword.LEND);
        map.put("BEGIN", Keyword.LBEGIN);
        map.put("while", Keyword.WHILE);
        map.put("alias", Keyword.ALIAS);
        map.put("__ENCODING__", Keyword.__ENCODING__);
    }

    protected void ambiguousOperator(String op, String syn) {
        parser.dispatch("on_operator_ambiguous", getRuntime().newSymbol(op), getRuntime().newString(syn));
    }

    protected boolean onMagicComment(String name, ByteList value) {
        boolean found = super.onMagicComment(name, value);

        parser.dispatch("on_magic_comment", getRuntime().newString(name), getRuntime().newString(value));

        return found;
    }

    private int getFloatToken(String number, int suffix) {
        if ((suffix & SUFFIX_R) != 0) {
            BigDecimal bd = new BigDecimal(number);
            BigDecimal denominator = BigDecimal.ONE.scaleByPowerOfTen(bd.scale());
            BigDecimal numerator = bd.multiply(denominator);

            try {
                numerator.longValueExact();
                denominator.longValueExact();
            } catch (ArithmeticException ae) {
                compile_error("Rational (" + numerator + "/" + denominator + ") out of range.");
            }
            return considerComplex(RipperParser.tRATIONAL, suffix);
        }

        double d;
        try {
            d = SafeDoubleParser.parseDouble(number);
        } catch (NumberFormatException e) {
            warn("Float " + number + " out of range.");

            d = number.startsWith("-") ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
        }

        return considerComplex(RipperParser.tFLOAT, suffix);
    }

    private int considerComplex(int token, int suffix) {
        if ((suffix & SUFFIX_I) == 0) {
            return token;
        } else {
            return RipperParser.tIMAGINARY;
        }
    }


    public boolean isVerbose() {
        return parser.getRuntime().isVerbose();
    }

    public void warn(String message) {
        parser.dispatch("warn", getRuntime().newString(message));
    }

    public void warning(String fmt) {
        parser.dispatch("warning", getRuntime().newString(fmt));
    }
    public void warning(String fmt, String arg) {
        parser.dispatch("warning", getRuntime().newString(fmt), getRuntime().newString(arg));
    }
    
    public enum Keyword {
        END ("end", RipperParser.keyword_end, RipperParser.keyword_end, EXPR_END),
        ELSE ("else", RipperParser.keyword_else, RipperParser.keyword_else, EXPR_BEG),
        CASE ("case", RipperParser.keyword_case, RipperParser.keyword_case, EXPR_BEG),
        ENSURE ("ensure", RipperParser.keyword_ensure, RipperParser.keyword_ensure, EXPR_BEG),
        MODULE ("module", RipperParser.keyword_module, RipperParser.keyword_module, EXPR_BEG),
        ELSIF ("elsif", RipperParser.keyword_elsif, RipperParser.keyword_elsif, EXPR_BEG),
        DEF ("def", RipperParser.keyword_def, RipperParser.keyword_def, EXPR_FNAME),
        RESCUE ("rescue", RipperParser.keyword_rescue, RipperParser.modifier_rescue, EXPR_MID),
        NOT ("not", RipperParser.keyword_not, RipperParser.keyword_not, EXPR_ARG),
        THEN ("then", RipperParser.keyword_then, RipperParser.keyword_then, EXPR_BEG),
        YIELD ("yield", RipperParser.keyword_yield, RipperParser.keyword_yield, EXPR_ARG),
        FOR ("for", RipperParser.keyword_for, RipperParser.keyword_for, EXPR_BEG),
        SELF ("self", RipperParser.keyword_self, RipperParser.keyword_self, EXPR_END),
        FALSE ("false", RipperParser.keyword_false, RipperParser.keyword_false, EXPR_END),
        RETRY ("retry", RipperParser.keyword_retry, RipperParser.keyword_retry, EXPR_END),
        RETURN ("return", RipperParser.keyword_return, RipperParser.keyword_return, EXPR_MID),
        TRUE ("true", RipperParser.keyword_true, RipperParser.keyword_true, EXPR_END),
        IF ("if", RipperParser.keyword_if, RipperParser.modifier_if, EXPR_BEG),
        DEFINED_P ("defined?", RipperParser.keyword_defined, RipperParser.keyword_defined, EXPR_ARG),
        SUPER ("super", RipperParser.keyword_super, RipperParser.keyword_super, EXPR_ARG),
        UNDEF ("undef", RipperParser.keyword_undef, RipperParser.keyword_undef, EXPR_FNAME|EXPR_FITEM),
        BREAK ("break", RipperParser.keyword_break, RipperParser.keyword_break, EXPR_MID),
        IN ("in", RipperParser.keyword_in, RipperParser.keyword_in, EXPR_BEG),
        DO ("do", RipperParser.keyword_do, RipperParser.keyword_do, EXPR_BEG),
        NIL ("nil", RipperParser.keyword_nil, RipperParser.keyword_nil, EXPR_END),
        UNTIL ("until", RipperParser.keyword_until, RipperParser.modifier_until, EXPR_BEG),
        UNLESS ("unless", RipperParser.keyword_unless, RipperParser.modifier_unless, EXPR_BEG),
        OR ("or", RipperParser.keyword_or, RipperParser.keyword_or, EXPR_BEG),
        NEXT ("next", RipperParser.keyword_next, RipperParser.keyword_next, EXPR_MID),
        WHEN ("when", RipperParser.keyword_when, RipperParser.keyword_when, EXPR_BEG),
        REDO ("redo", RipperParser.keyword_redo, RipperParser.keyword_redo, EXPR_END),
        AND ("and", RipperParser.keyword_and, RipperParser.keyword_and, EXPR_BEG),
        BEGIN ("begin", RipperParser.keyword_begin, RipperParser.keyword_begin, EXPR_BEG),
        __LINE__ ("__LINE__", RipperParser.keyword__LINE__, RipperParser.keyword__LINE__, EXPR_END),
        CLASS ("class", RipperParser.keyword_class, RipperParser.keyword_class, EXPR_CLASS),
        __FILE__("__FILE__", RipperParser.keyword__FILE__, RipperParser.keyword__FILE__, EXPR_END),
        LEND ("END", RipperParser.keyword_END, RipperParser.keyword_END, EXPR_END),
        LBEGIN ("BEGIN", RipperParser.keyword_BEGIN, RipperParser.keyword_BEGIN, EXPR_END),
        WHILE ("while", RipperParser.keyword_while, RipperParser.modifier_while, EXPR_BEG),
        ALIAS ("alias", RipperParser.keyword_alias, RipperParser.keyword_alias, EXPR_FNAME|EXPR_FITEM),
        __ENCODING__("__ENCODING__", RipperParser.keyword__ENCODING__, RipperParser.keyword__ENCODING__, EXPR_END);

        public final String name;
        public final int id0;
        public final int id1;
        public final int state;

        Keyword(String name, int id0, int id1, int state) {
            this.name = name;
            this.id0 = id0;
            this.id1 = id1;
            this.state = state;
        }
    }
    
    public static Keyword getKeyword(String str) {
        return (Keyword) map.get(str);
    }
    
    // MRI can directly seek source but we do not so we store all idents
    // here so the parser can then look at it on-demand to check things like
    // whether it is a valid identifier.  This should be safe to be a single
    // field since all ident logic should hit sequentially.
    String identValue;

    // Used for tiny smidgen of grammar in lexer (see setParserSupport())
    private RipperParserBase parser = null;

    private StrTerm lex_strterm;

    // When the heredoc identifier specifies <<-EOF that indents before ident. are ok (the '-').
    static final int STR_FUNC_INDENT=0x20;

    public RipperLexer(RipperParserBase parser, LexerSource src) {
        super(src);
        this.parser = parser;
        setState(0);
        lex_strterm = null;
        // FIXME: Do we need to parser_prepare like normal lexer?
        setCurrentEncoding(src.getEncoding());
        reset();
    }

    protected ByteList delayed = null;
    private int delayed_line = 0;
    private int delayed_col = 0;
    private boolean cr_seen = false;

    Has lexing started yet?
/**
     * Has lexing started yet?
     */
    public boolean hasStarted() {
        return src != null; // if no current line then nextc has never been called.
    }
    
    protected void flush_string_content(Encoding encoding) {
        if (delayed != null) {
            int len = lex_p - tokp;
            if (len > 0) {
                delayed.setEncoding(encoding);
                delayed.append(lexb.makeShared(tokp, len));
            }
            dispatchDelayedToken(RipperParser.tSTRING_CONTENT);
            tokp = lex_p;
        }
    }

    public void addDelayedToken(int tok, int end) {
        // Left over stuffs...Add to delayed for later processing.
        if (tok < end) {
            if (delayed == null) {
                delayed = new ByteList();
                delayed.setEncoding(getEncoding());
                delayed_line = ruby_sourceline;
                delayed_col = tok - lex_pbeg;
            }
            delayed.append(lexb, tok, end - tok);
            tokp = end;
        }
    }

    private boolean nextLine() {
        line_offset += lex_pend;

        ByteList v = lex_nextline;
        lex_nextline = null;

        if (v == null) {
            if (eofp) return true;

            if (src == null || (v = src.gets()) == null) {
                eofp = true;
                lex_goto_eol();
                return true;
            }
            cr_seen = false;
        }

        addDelayedToken(tokp, lex_pend);

        if (heredoc_end > 0) {
            ruby_sourceline = heredoc_end;
            heredoc_end = 0;
        }
        ruby_sourceline++;
        line_count++;
        lex_pbeg = lex_p = 0;
        lex_pend = lex_p + v.length();
        lexb = v;
        flush();
        lex_lastline = v;

        return false;
    }

    private int cr(int c) {
        if (peek('\n')) {
            lex_p++;
            c = '\n';
        } else if (!cr_seen) {
            cr_seen = true;
            warn("encountered \\\\r in middle of line, treated as a mere space");
        }
        return c;
    }

    public int nextc() {
        if (lex_p == lex_pend || eofp || lex_nextline != null) {
            if (nextLine()) return EOF;
        }

        int c = p(lex_p);
        lex_p++;

        if (c == '\r') c = cr(c);

        return c;
    }
    
    public void dispatchHeredocEnd() {
        if (delayed != null) {
            dispatchDelayedToken(RipperParser.tSTRING_CONTENT);
        }
        lex_goto_eol();
        dispatchIgnoredScanEvent(RipperParser.tHEREDOC_END);
    }
    
    public void compile_error(String message) {
        parser.error();
        parser.dispatch("compile_error", getRuntime().newString(message));
//        throw new SyntaxException(lexb.toString(), message);
    }

    public int tokenize_ident(int result) {
        String value = createTokenString();

        if (!isLexState(last_state, EXPR_DOT|EXPR_FNAME) && parser.getCurrentScope().isDefined(value) >= 0) {
            setState(EXPR_END);
        }

        identValue = value.intern();
        return result;
    }
    
    public void heredoc_restore(HeredocTerm here) {
        ByteList line = here.lastLine;
        lex_lastline = line;
        lex_pbeg = 0;
        lex_pend = lex_pbeg + line.length();
        lex_p = lex_pbeg + here.nth;
        lexb = line;
        heredoc_end = ruby_sourceline;
        ruby_sourceline = here.line;
        flush();
    }

    public int nextToken() throws IOException { //mri: yylex

        token = yylex();
        
        if (delayed != null) {
            dispatchDelayedToken(token);
            return token == EOF ? 0 : token;
        }
        
        if (token != EOF) dispatchScanEvent(token);

        return token == EOF ? 0 : token;
    }
    
    public String getIdent() {
        return identValue;
    }
    
    public Ruby getRuntime() {
        return parser.context.getRuntime();
    }

    Parse must pass its support object for some check at bottom of
yylex().  Ruby does it this way as well (i.e. a little parsing
logic in the lexer).
Params: parserSupport – /**
     * Parse must pass its support object for some check at bottom of
     * yylex().  Ruby does it this way as well (i.e. a little parsing
     * logic in the lexer).
     * 
     * @param parserSupport
     */
    public void setParser(RipperParserBase parserSupport) {
        this.parser = parserSupport;
    }

    @Override
    protected void setCompileOptionFlag(String name, ByteList value) {
        if (tokenSeen) {
            warning("`%s' is ignored after any tokens", name);
            return;
        }
    }

    @Override
    protected RegexpOptions parseRegexpFlags() throws IOException {
        StringBuilder unknownFlags = new StringBuilder(10);
        RegexpOptions options = parseRegexpFlags(unknownFlags);
        if (unknownFlags.length() != 0) {
            compile_error("unknown regexp option" +
                    (unknownFlags.length() > 1 ? "s" : "") + " - " + unknownFlags);
        }
        return options;
    }

    @Override
    protected void mismatchedRegexpEncodingError(Encoding optionEncoding, Encoding encoding) {
        compile_error("regexp encoding option '" + optionsEncodingChar(optionEncoding) +
                "' differs from source encoding '" + encoding + "'");
    }

    @Override
    protected void setTokenInfo(String name, ByteList value) {

    }

    protected void setEncoding(ByteList name) {
        Encoding newEncoding = parser.getRuntime().getEncodingService().loadEncoding(name);

        if (newEncoding == null) {
            compile_error("unknown encoding name: " + name.toString());
            return;
        }

        if (!newEncoding.isAsciiCompatible()) {
            compile_error(name.toString() + " is not ASCII compatible");
            return;
        }

        setEncoding(newEncoding);
    }

    public StrTerm getStrTerm() {
        return lex_strterm;
    }
    
    public void setStrTerm(StrTerm strterm) {
        this.lex_strterm = strterm;
    }

    // STR_NEW3/parser_str_new
    public IRubyObject createStr(ByteList buffer, int flags) {
        Encoding bufferEncoding = buffer.getEncoding();
        int codeRange = StringSupport.codeRangeScan(bufferEncoding, buffer);

        if ((flags & STR_FUNC_REGEXP) == 0 && bufferEncoding.isAsciiCompatible()) {
            // If we have characters outside 7-bit range and we are still ascii then change to ascii-8bit
            if (codeRange == StringSupport.CR_7BIT) {
                // Do nothing like MRI
            } else if (getEncoding() == USASCII_ENCODING &&
                    bufferEncoding != UTF8_ENCODING) {
                codeRange = RipperParserBase.associateEncoding(buffer, ASCII8BIT_ENCODING, codeRange);
            }
        }

        return getRuntime().newString(buffer);
    }
    
    What type/kind of quote are we dealing with?
Params: c – first character the the quote construct
Returns: a token that specifies the quote type/**
     * What type/kind of quote are we dealing with?
     * 
     * @param c first character the the quote construct
     * @return a token that specifies the quote type
     */
    private int parseQuote(int c) throws IOException {
        int begin, end;

        String value = "%" + (char) c;
        
        // Short-hand (e.g. %{,%.,%!,... versus %Q{).
        if (!Character.isLetterOrDigit(c)) {
            begin = c;
            c = 'Q';

        // Long-hand (e.g. %Q{}).
        } else {

            begin = nextc();
            value = value + (char) begin;
            if (Character.isLetterOrDigit(begin) || !isASCII()) {
                compile_error("unknown type of %string");
                return EOF;
            }
        }
        if (c == EOF || begin == EOF) {
            compile_error("unterminated quoted string meets end of file");
            return EOF;
        }
        
        // Figure end-char.  '\0' is special to indicate begin=end and that no nesting?
        switch(begin) {
        case '(': end = ')'; break;
        case '[': end = ']'; break;
        case '{': end = '}'; break;
        case '<': end = '>'; break;
        default: 
            end = begin; 
            begin = '\0';
        }

        switch (c) {
        case 'Q':
            lex_strterm = new StringTerm(str_dquote, begin ,end);
            return RipperParser.tSTRING_BEG;

        case 'q':
            lex_strterm = new StringTerm(str_squote, begin, end);
            return RipperParser.tSTRING_BEG;

        case 'W':
            lex_strterm = new StringTerm(str_dword, begin, end);
            return RipperParser.tWORDS_BEG;

        case 'w':
            lex_strterm = new StringTerm(str_sword, begin, end);
            return RipperParser.tQWORDS_BEG;

        case 'x':
            lex_strterm = new StringTerm(str_xquote, begin, end);
            return RipperParser.tXSTRING_BEG;

        case 'r':
            lex_strterm = new StringTerm(str_regexp, begin, end);
            return RipperParser.tREGEXP_BEG;

        case 's':
            lex_strterm = new StringTerm(str_ssym, begin, end);
            setState(EXPR_FNAME|EXPR_FITEM);
            return RipperParser.tSYMBEG;

        case 'I':
            lex_strterm = new StringTerm(str_dword, begin, end);
            return RipperParser.tSYMBOLS_BEG;

        case 'i':
            lex_strterm = new StringTerm(str_sword, begin, end);
            return RipperParser.tQSYMBOLS_BEG;
        default:
            compile_error("Unknown type of %string. Expected 'Q', 'q', 'w', 'x', 'r' or any non letter character, but found '" + c + "'.");
            return -1; //notreached
        }
    }
    
    private int hereDocumentIdentifier() throws IOException {
        int c = nextc(); 
        int term;

        int func = 0;
        if (c == '-') {
            c = nextc();
            func = STR_FUNC_INDENT;
        } else if (c == '~') {
            c = nextc();
            func = STR_FUNC_INDENT;
            heredoc_indent = Integer.MAX_VALUE;
            heredoc_line_indent = 0;
        }

        ByteList markerValue;
        if (c == '\'' || c == '"' || c == '`') {
            if (c == '\'') {
                func |= str_squote;
            } else if (c == '"') {
                func |= str_dquote;
            } else {
                func |= str_xquote; 
            }

            markerValue = new ByteList();
            markerValue.setEncoding(getEncoding());
            term = c;
            while ((c = nextc()) != EOF && c != term) {
                if (!tokenAddMBC(c, markerValue)) return EOF;
            }
            if (c == EOF) compile_error("unterminated here document identifier");
        } else {
            if (!isIdentifierChar(c)) {
                pushback(c);
                if ((func & STR_FUNC_INDENT) != 0) {
                    pushback(heredoc_indent > 0 ? '~' : '-');
                }
                return 0;
            }
            markerValue = new ByteList();
            markerValue.setEncoding(getEncoding());
            term = '"';
            func |= str_dquote;
            do {
                if (!tokenAddMBC(c, markerValue)) return EOF;
            } while ((c = nextc()) != EOF && isIdentifierChar(c));

            pushback(c);
        }

        dispatchScanEvent(RipperParser.tHEREDOC_BEG);
        int len = lex_p - lex_pbeg;        
        lex_goto_eol();
        lex_strterm = new HeredocTerm(markerValue, func, len, ruby_sourceline, lex_lastline);

        flush();
        return term == '`' ? RipperParser.tXSTRING_BEG : RipperParser.tSTRING_BEG;
    }
    
    private boolean arg_ambiguous() {
        parser.dispatch("on_arg_ambiguous");
        return true;
    }

    /*
     * Not normally used, but is left in here since it can be useful in debugging
     * grammar and lexing problems.
     *
     */
    private void printToken(int token) {
        //System.out.print("LOC: " + support.getPosition() + " ~ ");

        switch (token) {
            case RipperParser.yyErrorCode: System.err.print("yyErrorCode,"); break;
            // missing some RipperParser.
            case RipperParser.tIDENTIFIER: System.err.print("tIDENTIFIER["+ value() + "],"); break;
            case RipperParser.tFID: System.err.print("tFID[" + value() + "],"); break;
            case RipperParser.tGVAR: System.err.print("tGVAR[" + value() + "],"); break;
            case RipperParser.tIVAR: System.err.print("tIVAR[" + value() +"],"); break;
            case RipperParser.tCONSTANT: System.err.print("tCONSTANT["+ value() +"],"); break;
            case RipperParser.tCVAR: System.err.print("tCVAR,"); break;
            case RipperParser.tINTEGER: System.err.print("tINTEGER,"); break;
            case RipperParser.tFLOAT: System.err.print("tFLOAT,"); break;
            case RipperParser.tSTRING_CONTENT: System.err.print("tSTRING_CONTENT[" +  value() + "],"); break;
            case RipperParser.tSTRING_BEG: System.err.print("tSTRING_BEG,"); break;
            case RipperParser.tSTRING_END: System.err.print("tSTRING_END,"); break;
            case RipperParser.tSTRING_DBEG: System.err.print("tSTRING_DBEG,"); break;
            case RipperParser.tSTRING_DVAR: System.err.print("tSTRING_DVAR,"); break;
            case RipperParser.tXSTRING_BEG: System.err.print("tXSTRING_BEG,"); break;
            case RipperParser.tREGEXP_BEG: System.err.print("tREGEXP_BEG,"); break;
            case RipperParser.tREGEXP_END: System.err.print("tREGEXP_END,"); break;
            case RipperParser.tWORDS_BEG: System.err.print("tWORDS_BEG,"); break;
            case RipperParser.tQWORDS_BEG: System.err.print("tQWORDS_BEG,"); break;
            case RipperParser.tBACK_REF: System.err.print("tBACK_REF,"); break;
            case RipperParser.tBACK_REF2: System.err.print("tBACK_REF2,"); break;
            case RipperParser.tNTH_REF: System.err.print("tNTH_REF,"); break;
            case RipperParser.tUPLUS: System.err.print("tUPLUS"); break;
            case RipperParser.tUMINUS: System.err.print("tUMINUS,"); break;
            case RipperParser.tPOW: System.err.print("tPOW,"); break;
            case RipperParser.tCMP: System.err.print("tCMP,"); break;
            case RipperParser.tEQ: System.err.print("tEQ,"); break;
            case RipperParser.tEQQ: System.err.print("tEQQ,"); break;
            case RipperParser.tNEQ: System.err.print("tNEQ,"); break;
            case RipperParser.tGEQ: System.err.print("tGEQ,"); break;
            case RipperParser.tLEQ: System.err.print("tLEQ,"); break;
            case RipperParser.tANDOP: System.err.print("tANDOP,"); break;
            case RipperParser.tOROP: System.err.print("tOROP,"); break;
            case RipperParser.tMATCH: System.err.print("tMATCH,"); break;
            case RipperParser.tNMATCH: System.err.print("tNMATCH,"); break;
            case RipperParser.tDOT: System.err.print("tDOT,"); break;
            case RipperParser.tDOT2: System.err.print("tDOT2,"); break;
            case RipperParser.tDOT3: System.err.print("tDOT3,"); break;
            case RipperParser.tAREF: System.err.print("tAREF,"); break;
            case RipperParser.tASET: System.err.print("tASET,"); break;
            case RipperParser.tLSHFT: System.err.print("tLSHFT,"); break;
            case RipperParser.tRSHFT: System.err.print("tRSHFT,"); break;
            case RipperParser.tCOLON2: System.err.print("tCOLON2,"); break;
            case RipperParser.tCOLON3: System.err.print("tCOLON3,"); break;
            case RipperParser.tOP_ASGN: System.err.print("tOP_ASGN,"); break;
            case RipperParser.tASSOC: System.err.print("tASSOC,"); break;
            case RipperParser.tLPAREN: System.err.print("tLPAREN,"); break;
            case RipperParser.tLPAREN2: System.err.print("tLPAREN2,"); break;
            case RipperParser.tLPAREN_ARG: System.err.print("tLPAREN_ARG,"); break;
            case RipperParser.tLBRACK: System.err.print("tLBRACK,"); break;
            case RipperParser.tRBRACK: System.err.print("tRBRACK,"); break;
            case RipperParser.tLBRACE: System.err.print("tLBRACE,"); break;
            case RipperParser.tLBRACE_ARG: System.err.print("tLBRACE_ARG,"); break;
            case RipperParser.tSTAR: System.err.print("tSTAR,"); break;
            case RipperParser.tSTAR2: System.err.print("tSTAR2,"); break;
            case RipperParser.tAMPER: System.err.print("tAMPER,"); break;
            case RipperParser.tAMPER2: System.err.print("tAMPER2,"); break;
            case RipperParser.tSYMBEG: System.err.print("tSYMBEG,"); break;
            case RipperParser.tTILDE: System.err.print("tTILDE,"); break;
            case RipperParser.tPERCENT: System.err.print("tPERCENT,"); break;
            case RipperParser.tDIVIDE: System.err.print("tDIVIDE,"); break;
            case RipperParser.tPLUS: System.err.print("tPLUS,"); break;
            case RipperParser.tMINUS: System.err.print("tMINUS,"); break;
            case RipperParser.tLT: System.err.print("tLT,"); break;
            case RipperParser.tGT: System.err.print("tGT,"); break;
            case RipperParser.tCARET: System.err.print("tCARET,"); break;
            case RipperParser.tBANG: System.err.print("tBANG,"); break;
            case RipperParser.tLCURLY: System.err.print("tTLCURLY,"); break;
            case RipperParser.tRCURLY: System.err.print("tRCURLY,"); break;
            case RipperParser.tPIPE: System.err.print("tTPIPE,"); break;
            case RipperParser.tLAMBDA: System.err.print("tLAMBDA,"); break;
            case RipperParser.tLAMBEG: System.err.print("tLAMBEG,"); break;
            case RipperParser.tRPAREN: System.err.print("tRPAREN,"); break;
            case RipperParser.tLABEL: System.err.print("tLABEL("+ value() +":),"); break;
            case RipperParser.tLABEL_END: System.err.print("tLABEL_END"); break;
            case '\n': System.err.println("NL"); break;
            case EOF: System.out.println("EOF"); break;
            case RipperParser.tDSTAR: System.err.print("tDSTAR"); break;
            case RipperParser.tSTRING_DEND: System.err.print("tDSTRING_DEND,"); break;
            default: System.err.print("'" + (char)token + "'[" + token + "]"); break;
        }
    }
    
    public boolean hasScanEvent() {
        if (lex_p < tokp) {
            throw parser.getRuntime().newRuntimeError("lex_p < tokp");
        }
        
        return lex_p > tokp;
    }
    
    public void dispatchDelayedToken(int token) { //mri: ripper_dispatch_delayed_token
        int saved_line = ruby_sourceline;
        int saved_tokp = tokp;
        
        ruby_sourceline = delayed_line;
        tokp = lex_pbeg + delayed_col;

        String event = tokenToEventId(token);
        IRubyObject value = delayed == null ? parser.context.nil : parser.getRuntime().newString(delayed.dup());
        
        yaccValue = parser.dispatch(event, value);
        delayed = null;
        ruby_sourceline = saved_line;
        tokp = saved_tokp;
    }
    
    public void dispatchIgnoredScanEvent(int token) {
        if (!hasScanEvent()) return;
        
        scanEventValue(token);
    }
    
    public void dispatchScanEvent(int token) { //mri: ripper_dispatch_scan_event
        if (!hasScanEvent()) return;
        
        yaccValue = scanEventValue(token);
    }
    
    private IRubyObject scanEventValue(int token) { // mri: ripper_scane_event_val
        //System.out.println("TOKP: " + tokp + ", LEX_P: " + lex_p);
        IRubyObject value = parser.getRuntime().newString(lexb.makeShared(tokp, lex_p - tokp));
        String event = tokenToEventId(token);
        //System.out.println("EVENT: " + event + ", VALUE: " + value);
        IRubyObject returnValue = parser.dispatch(event, value);
        flush();
        return returnValue;
    }
    
    private String tokenToEventId(int token) {
        switch(token) {
            case ' ': return "on_words_sep";
            case RipperParser.tBANG: return "on_op";
            case RipperParser.tPERCENT: return "on_op";
            case RipperParser.tANDDOT: return "on_op";
            case RipperParser.tAMPER2: return "on_op";
            case RipperParser.tSTAR2: return "on_op";
            case RipperParser.tPLUS: return "on_op";
            case RipperParser.tMINUS: return "on_op";
            case RipperParser.tDIVIDE: return "on_op";
            case RipperParser.tLT: return "on_op";
            case '=': return "on_op";
            case RipperParser.tGT: return "on_op";
            case '?': return "on_op";
            case RipperParser.tCARET: return "on_op";
            case RipperParser.tPIPE: return "on_op";
            case RipperParser.tTILDE: return "on_op";
            case ':': return "on_op";
            case ',': return "on_comma";
            case '.': return "on_period";
            case RipperParser.tDOT: return "on_period";
            case ';': return "on_semicolon";
            case RipperParser.tBACK_REF2: return "on_backtick";
            case '\n': return "on_nl";
            case RipperParser.keyword_alias: return "on_kw";
            case RipperParser.keyword_and: return "on_kw";
            case RipperParser.keyword_begin: return "on_kw";
            case RipperParser.keyword_break: return "on_kw";
            case RipperParser.keyword_case: return "on_kw";
            case RipperParser.keyword_class: return "on_kw";
            case RipperParser.keyword_def: return "on_kw";
            case RipperParser.keyword_defined: return "on_kw";
            case RipperParser.keyword_do: return "on_kw";
            case RipperParser.keyword_do_block: return "on_kw";
            case RipperParser.keyword_do_cond: return "on_kw";
            case RipperParser.keyword_else: return "on_kw";
            case RipperParser.keyword_elsif: return "on_kw";
            case RipperParser.keyword_end: return "on_kw";
            case RipperParser.keyword_ensure: return "on_kw";
            case RipperParser.keyword_false: return "on_kw";
            case RipperParser.keyword_for: return "on_kw";
            case RipperParser.keyword_if: return "on_kw";
            case RipperParser.modifier_if: return "on_kw";
            case RipperParser.keyword_in: return "on_kw";
            case RipperParser.keyword_module: return "on_kw";
            case RipperParser.keyword_next: return "on_kw";
            case RipperParser.keyword_nil: return "on_kw";
            case RipperParser.keyword_not: return "on_kw";
            case RipperParser.keyword_or: return "on_kw";
            case RipperParser.keyword_redo: return "on_kw";
            case RipperParser.keyword_rescue: return "on_kw";
            case RipperParser.modifier_rescue: return "on_kw";
            case RipperParser.keyword_retry: return "on_kw";
            case RipperParser.keyword_return: return "on_kw";
            case RipperParser.keyword_self: return "on_kw";
            case RipperParser.keyword_super: return "on_kw";
            case RipperParser.keyword_then: return "on_kw";
            case RipperParser.keyword_true: return "on_kw";
            case RipperParser.keyword_undef: return "on_kw";
            case RipperParser.keyword_unless: return "on_kw";
            case RipperParser.modifier_unless: return "on_kw";
            case RipperParser.keyword_until: return "on_kw";
            case RipperParser.modifier_until: return "on_kw";
            case RipperParser.keyword_when: return "on_kw";
            case RipperParser.keyword_while: return "on_kw";
            case RipperParser.modifier_while: return "on_kw";
            case RipperParser.keyword_yield: return "on_kw";
            case RipperParser.keyword__FILE__: return "on_kw";
            case RipperParser.keyword__LINE__: return "on_kw";
            case RipperParser.keyword__ENCODING__: return "on_kw";
            case RipperParser.keyword_BEGIN: return "on_kw";
            case RipperParser.keyword_END: return "on_kw";
            case RipperParser.keyword_do_lambda: return "on_kw";
            case RipperParser.tAMPER: return "on_op";
            case RipperParser.tANDOP: return "on_op";
            case RipperParser.tAREF: return "on_op";
            case RipperParser.tASET: return "on_op";
            case RipperParser.tASSOC: return "on_op";
            case RipperParser.tBACK_REF: return "on_backref";
            case RipperParser.tCHAR: return "on_CHAR";
            case RipperParser.tCMP: return "on_op";
            case RipperParser.tCOLON2: return "on_op";
            case RipperParser.tCOLON3: return "on_op";
            case RipperParser.tCONSTANT: return "on_const";
            case RipperParser.tCVAR: return "on_cvar";
            case RipperParser.tDOT2: return "on_op";
            case RipperParser.tDOT3: return "on_op";
            case RipperParser.tEQ: return "on_op";
            case RipperParser.tEQQ: return "on_op";
            case RipperParser.tFID: return "on_ident";
            case RipperParser.tFLOAT: return "on_float";
            case RipperParser.tGEQ: return "on_op";
            case RipperParser.tGVAR: return "on_gvar";
            case RipperParser.tIDENTIFIER: return "on_ident";
            case RipperParser.tIMAGINARY: return "on_imaginary";
            case RipperParser.tINTEGER: return "on_int";
            case RipperParser.tIVAR: return "on_ivar";
            case RipperParser.tLBRACE: return "on_lbrace";
            case RipperParser.tLBRACE_ARG: return "on_lbrace";
            case RipperParser.tLCURLY: return "on_lbrace";
            case RipperParser.tRCURLY: return "on_rbrace";
            case RipperParser.tLBRACK: return "on_lbracket";
            case '[': return "on_lbracket";
            case RipperParser.tRBRACK: return "on_rbracket";
            case RipperParser.tLEQ: return "on_op";
            case RipperParser.tLPAREN: return "on_lparen";
            case RipperParser.tLPAREN_ARG: return "on_lparen";
            case RipperParser.tLPAREN2: return "on_lparen";
            case ')': return "on_rparen";  // ENEBO: Don't this this can happen.
            case RipperParser.tLSHFT: return "on_op";
            case RipperParser.tMATCH: return "on_op";
            case RipperParser.tNEQ: return "on_op";
            case RipperParser.tNMATCH: return "on_op";
            case RipperParser.tNTH_REF: return "on_backref";
            case RipperParser.tOP_ASGN: return "on_op";
            case RipperParser.tOROP: return "on_op";
            case RipperParser.tPOW: return "on_op";
            case RipperParser.tQSYMBOLS_BEG: return "on_qsymbols_beg";
            case RipperParser.tRATIONAL: return "on_rational";
            case RipperParser.tSYMBOLS_BEG: return "on_symbols_beg";
            case RipperParser.tQWORDS_BEG: return "on_qwords_beg";
            case RipperParser.tREGEXP_BEG:return "on_regexp_beg";
            case RipperParser.tREGEXP_END: return "on_regexp_end";
            case RipperParser.tRPAREN: return "on_rparen";
            case RipperParser.tRSHFT: return "on_op";
            case RipperParser.tSTAR: return "on_op";
            case RipperParser.tDSTAR: return "on_op";
            case RipperParser.tSTRING_BEG: return "on_tstring_beg";
            case RipperParser.tSTRING_CONTENT: return "on_tstring_content";
            case RipperParser.tSTRING_DBEG: return "on_embexpr_beg";
            case RipperParser.tSTRING_DEND: return "on_embexpr_end";
            case RipperParser.tSTRING_DVAR: return "on_embvar";
            case RipperParser.tSTRING_END: return "on_tstring_end";
            case RipperParser.tSYMBEG: return "on_symbeg";
            case RipperParser.tUMINUS: return "on_op";
            case RipperParser.tUMINUS_NUM: return "on_op";
            case RipperParser.tUPLUS: return "on_op";
            case RipperParser.tWORDS_BEG: return "on_words_beg";
            case RipperParser.tXSTRING_BEG: return "on_backtick";
            case RipperParser.tLABEL: return "on_label";
            case RipperParser.tLABEL_END: return "on_label_end";
            case RipperParser.tLAMBDA: return "on_tlambda";
            case RipperParser.tLAMBEG: return "on_tlambeg";

            // ripper specific tokens
            case RipperParser.tIGNORED_NL: return "on_ignored_nl";
            case RipperParser.tCOMMENT: return "on_comment";
            case RipperParser.tEMBDOC_BEG: return "on_embdoc_beg";
            case RipperParser.tEMBDOC: return "on_embdoc";
            case RipperParser.tEMBDOC_END: return "on_embdoc_end";
            case tSP: return "on_sp";
            case RipperParser.tHEREDOC_BEG: return "on_heredoc_beg";
            case RipperParser.tHEREDOC_END: return "on_heredoc_end";
            case RipperParser.k__END__: return "on___end__";
            default: // Weird catchall but we will try and not use < 256 value trick like MRI
                return "on_CHAR";
        }
    }
    
    // DEBUGGING HELP 
    private int yylex2() throws IOException {
        try {
        int currentToken = yylex2();

            printToken(currentToken);
        
        return currentToken;
        } catch (Exception e) {
            System.out.println("FFUFUFUFUFUFUFUF: " + e);
            return EOF;
        }
    }

     Returns the next token. Also sets yyVal as needed.
Returns:    the next token/**
     *  Returns the next token. Also sets yyVal as needed.
     *
     * @return    the next token
     */
    private int yylex() throws IOException {
        int c;
        boolean spaceSeen = false;
        boolean commandState;
        boolean tokenSeen = this.tokenSeen;

        if (lex_strterm != null) return lex_strterm.parseString(this, src);

        commandState = commandStart;
        commandStart = false;
        this.tokenSeen = true;
        boolean fallthru = false;

        loop: for(;;) {
            last_state = lex_state;
            c = nextc();
            switch(c) {
            case '\000': /* NUL */
            case '\004': /* ^D */
            case '\032': /* ^Z */
            case EOF:	 /* end of script. */
                return -1;
           
                /* white spaces */
            case ' ': case '\t': case '\f': case '\r':
            case '\13': /* '\v' */ {
                ByteList whitespaceBuf = new ByteList(); // FIXME: bytelist encoding hookedup
                whitespaceBuf.append(c);
                boolean looping = true;
                spaceSeen = true;
                while (looping && (c = nextc()) != EOF) {
                    switch (c) {
                        case ' ': case '\t': case '\f': case '\r':
                        case '\13': /* '\v' */
                            whitespaceBuf.append(c);
                            break;
                        default:
                            looping = false;
                            break;
                    }
                }
                pushback(c);
                dispatchScanEvent(tSP);
                continue;
            }
            case '#': { /* it's a comment */
                this.tokenSeen = tokenSeen;
                if (!parser_magic_comment(lexb.makeShared(lex_p, lex_pend - lex_p))) {
                    if (comment_at_top()) set_file_encoding(lex_p, lex_pend);
                }
                lex_p = lex_pend;
                dispatchScanEvent(RipperParser.tCOMMENT);

                fallthru = true;
            }
            /* fall through */
            case '\n': {
                this.tokenSeen = tokenSeen;
                boolean normalArg = isLexState(lex_state, EXPR_BEG | EXPR_CLASS | EXPR_FNAME | EXPR_DOT) &&
                        !isLexState(lex_state, EXPR_LABELED);
                if (normalArg || isLexStateAll(lex_state, EXPR_ARG | EXPR_LABELED)) {
                    if (!fallthru) dispatchScanEvent(RipperParser.tIGNORED_NL);
                    fallthru = false;
                    if (!normalArg && inKwarg) {
                        commandStart = true;
                        setState(EXPR_BEG);
                        return '\n';
                    }
                    continue loop;
                }

                boolean done = false;
                while (!done) {
                    c = nextc();

                    switch (c) {
                        case ' ': case '\t': case '\f': case '\r':
                        case '\13': /* '\v' */
                            spaceSeen = true;
                            continue;
                        case '&':
                        case '.': {
                            dispatchDelayedToken(RipperParser.tIGNORED_NL);
                            if (peek('.') == (c == '&')) {
                                pushback(c);

                                dispatchScanEvent(tSP);
                                continue loop;
                            }
                        }
                        default:
                            ruby_sourceline--;
                            lex_nextline = lex_lastline;
                        case -1:        // EOF (ENEBO: After default?
                            lex_goto_eol();
                            if (c != -1) tokp = lex_p;
                            done = true;
                    }
                }

                commandStart = true;
                setState(EXPR_BEG);
                return '\n';
            }
            case '*':
                return star(spaceSeen);
            case '!':
                return bang();
            case '=':
                // documentation nodes
                if (was_bol()) {
                    if (strncmp(lexb.makeShared(lex_p, lex_pend - lex_p), BEGIN_DOC_MARKER, BEGIN_DOC_MARKER.length()) && 
                            Character.isWhitespace(p(lex_p + 5))) {
                        boolean first_p = true;
                        
                        lex_goto_eol();
                        
                        dispatchScanEvent(RipperParser.tEMBDOC_BEG);
                        for (;;) {
                            lex_goto_eol();
                            
                            if (!first_p) dispatchScanEvent(RipperParser.tEMBDOC);
                            first_p = false;
                            
                            c = nextc();

                            if (c == EOF) {
                                compile_error("embedded document meets end of file");
                                return EOF;
                            }

                            if (c != '=') continue;

                            if (strncmp(lexb.makeShared(lex_p, lex_pend - lex_p), END_DOC_MARKER, END_DOC_MARKER.length()) &&
                                    (lex_p + 3 == lex_pend || Character.isWhitespace(p(lex_p + 3)))) {
                                break;
                            }
                        }
                        lex_goto_eol();
                        dispatchScanEvent(RipperParser.tEMBDOC_END);
                        
                        continue loop;
                    }
                }


                setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG);

                c = nextc();
                if (c == '=') {
                    c = nextc();
                    if (c == '=') {
                        return RipperParser.tEQQ;
                    }

                    pushback(c);
                    return RipperParser.tEQ;
                }
                if (c == '~') {
                    return RipperParser.tMATCH;
                } else if (c == '>') {
                    return RipperParser.tASSOC;
                }
                pushback(c);
                return '=';
                
            case '<':
                return lessThan(spaceSeen);
            case '>':
                return greaterThan();
            case '"':
                return doubleQuote(commandState);
            case '`':
                return backtick(commandState);
            case '\'':
                return singleQuote(commandState);
            case '?':
                return questionMark();
            case '&':
                return ampersand(spaceSeen);
            case '|':
                return pipe();
            case '+':
                return plus(spaceSeen);
            case '-':
                return minus(spaceSeen);
            case '.':
                return dot();
            case '0' : case '1' : case '2' : case '3' : case '4' :
            case '5' : case '6' : case '7' : case '8' : case '9' :
                return parseNumber(c);
            case ')':
                return rightParen();
            case ']':
                return rightBracket();
            case '}':
                return rightCurly();
            case ':':
                return colon(spaceSeen);
            case '/':
                return slash(spaceSeen);
            case '^':
                return caret();
            case ';':
                commandStart = true;
                setState(EXPR_BEG);
                return ';';
            case ',':
                return comma(c);
            case '~':
                return tilde();
            case '(':
                return leftParen(spaceSeen);
            case '[':
                return leftBracket(spaceSeen);
            case '{':
            	return leftCurly();
            case '\\':
                c = nextc();
                if (c == '\n') {
                    spaceSeen = true;
                    dispatchScanEvent(tSP);
                    continue;
                }
                pushback(c);
                return '\\';
            case '%':
                return percent(spaceSeen);
            case '$':
                return dollar();
            case '@':
                return at();
            case '_':
                if (was_bol() && whole_match_p(END_MARKER, false)) {
                    __end__seen = true;
                    eofp = true;
                    
                    lex_goto_eol();
                    dispatchScanEvent(RipperParser.k__END__);
                    return EOF;
                }
                return identifier(c, commandState);
            default:
                return identifier(c, commandState);
            }
        }
    }

    private int identifierToken(int last_state, int result, String value) {
        if (result == RipperParser.tIDENTIFIER && !isLexState(last_state, EXPR_DOT|EXPR_FNAME) &&
                parser.getCurrentScope().isDefined(value) >= 0) {
            setState(EXPR_END|EXPR_LABEL);
        }

        identValue = value.intern();
        return result;
    }
    
    private int ampersand(boolean spaceSeen) throws IOException {
        int c = nextc();
        
        switch (c) {
        case '&':
            setState(EXPR_BEG);
            if ((c = nextc()) == '=') {
                setState(EXPR_BEG);
                return RipperParser.tOP_ASGN;
            }
            pushback(c);
            return RipperParser.tANDOP;
        case '=':
            setState(EXPR_BEG);
            return RipperParser.tOP_ASGN;
        case '.':
            setState(EXPR_DOT);
            return RipperParser.tANDDOT;
        }
        pushback(c);

        if (isSpaceArg(c, spaceSeen)) {
            if (isVerbose()) warning("`&' interpreted as argument prefix");
            c = RipperParser.tAMPER;
        } else if (isBEG()) {
            c = RipperParser.tAMPER;
        } else {
            c = RipperParser.tAMPER2;
        }

        setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG);
        
        return c;
    }

    private int at() throws IOException {
        newtok(true);
        int c = nextc();
        int result;
        if (c == '@') {
            c = nextc();
            result = RipperParser.tCVAR;
        } else {
            result = RipperParser.tIVAR;
        }

        if (c == EOF || isSpace(c)) {
            if (result == RipperParser.tIVAR) {
                compile_error("`@' without identifiers is not allowed as an instance variable name");
            }

            compile_error("`@@' without identifiers is not allowed as a class variable name");
        } else if (Character.isDigit(c) || !isIdentifierChar(c)) {
            pushback(c);
            if (result == RipperParser.tIVAR) {
                compile_error("`@" + ((char) c) + "' is not allowed as an instance variable name");
            }
            compile_error("`@@" + ((char) c) + "' is not allowed as a class variable name");
        }

        if (!tokadd_ident(c)) return EOF;

        last_state = lex_state;
        setState(EXPR_END);

        return tokenize_ident(result);
    }
    
    private int backtick(boolean commandState) throws IOException {
        if (isLexState(lex_state, EXPR_FNAME)) {
            setState(EXPR_ENDFN);
            return RipperParser.tBACK_REF2;
        }
        if (isLexState(lex_state, EXPR_DOT)) {
            setState(commandState ? EXPR_CMDARG : EXPR_ARG);

            return RipperParser.tBACK_REF2;
        }

        lex_strterm = new StringTerm(str_xquote, '\0', '`');
        return RipperParser.tXSTRING_BEG;
    }
    
    private int bang() throws IOException {
        int c = nextc();

        if (isAfterOperator()) {
            setState(EXPR_ARG);
            if (c == '@') return RipperParser.tBANG;
        } else {
            setState(EXPR_BEG);
        }
        
        switch (c) {
        case '=':
            return RipperParser.tNEQ;
        case '~':
            return RipperParser.tNMATCH;
        default: // Just a plain bang
            pushback(c);
            
            return RipperParser.tBANG;
        }
    }
    
    private int caret() throws IOException {
        int c = nextc();
        if (c == '=') {
            setState(EXPR_BEG);
            
            return RipperParser.tOP_ASGN;
        }

        setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG);
        
        pushback(c);
        
        return RipperParser.tCARET;
    }

    private int colon(boolean spaceSeen) throws IOException {
        int c = nextc();
        
        if (c == ':') {
            if (isBEG() || isLexState(lex_state, EXPR_CLASS) || (isARG() && spaceSeen)) {
                setState(EXPR_BEG);
                return RipperParser.tCOLON3;
            }
            setState(EXPR_DOT);
            return RipperParser.tCOLON2;
        }

        if (isEND() || Character.isWhitespace(c) || c == '#') {
            pushback(c);
            setState(EXPR_BEG);
            return ':';
        }
        
        switch (c) {
        case '\'':
            lex_strterm = new StringTerm(str_ssym, '\0', c);
            break;
        case '"':
            lex_strterm = new StringTerm(str_dsym, '\0', c);
            break;
        default:
            pushback(c);
            break;
        }

        setState(EXPR_FNAME);
        return RipperParser.tSYMBEG;
    }

    private int comma(int c) throws IOException {
        setState(EXPR_BEG|EXPR_LABEL);
        return c;
    }

    private int doKeyword(int state) {
        int leftParenBegin = getLeftParenBegin();
        if (leftParenBegin > 0 && leftParenBegin == parenNest) {
            setLeftParenBegin(0);
            parenNest--;
            return RipperParser.keyword_do_lambda;
        }

        if (conditionState.isInState()) return RipperParser.keyword_do_cond;

        if (cmdArgumentState.isInState() && !isLexState(state, EXPR_CMDARG)) {
            return RipperParser.keyword_do_block;
        }
        if (isLexState(state,  EXPR_BEG|EXPR_ENDARG)) {
            return RipperParser.keyword_do_block;
        }
        return RipperParser.keyword_do;
    }
    
    private int dollar() throws IOException {
        setState(EXPR_END);
        newtok(true);
        int c = nextc();
        
        switch (c) {
        case '_':       /* $_: last read line string */
            c = nextc();
            if (isIdentifierChar(c)) {
                if (!tokadd_ident(c)) return EOF;

                last_state = lex_state;
                setState(EXPR_END);
                identValue = createTokenString().intern();
                return RipperParser.tGVAR;
            }
            pushback(c);
            c = '_';
            
            // fall through
        case '~':       /* $~: match-data */
        case '*':       /* $*: argv */
        case '$':       /* $$: pid */
        case '?':       /* $?: last status */
        case '!':       /* $!: error string */
        case '@':       /* $@: error position */
        case '/':       /* $/: input record separator */
        case '\\':      /* $\: output record separator */
        case ';':       /* $;: field separator */
        case ',':       /* $,: output field separator */
        case '.':       /* $.: last read line number */
        case '=':       /* $=: ignorecase */
        case ':':       /* $:: load path */
        case '<':       /* $<: reading filename */
        case '>':       /* $>: default output handle */
        case '\"':      /* $": already loaded files */
            identValue = "$" + (char) c;
            return RipperParser.tGVAR;

        case '-':
            c = nextc();
            if (isIdentifierChar(c)) {
                if (!tokadd_mbchar(c)) return EOF;
            } else {
                pushback(c);
                pushback('-');
                return '$';
            }
            identValue = createTokenString().intern();
            /* xxx shouldn't check if valid option variable */
            return RipperParser.tGVAR;

        case '&':       /* $&: last match */
        case '`':       /* $`: string before last match */
        case '\'':      /* $': string after last match */
        case '+':       /* $+: string matches last paren. */
            // Explicit reference to these vars as symbols...
            if (last_state == EXPR_FNAME) {
                identValue = "$" + (char) c;
                return RipperParser.tGVAR;
            }

            identValue = "$" + (char) c;
            return RipperParser.tBACK_REF;

        case '1': case '2': case '3': case '4': case '5': case '6':
        case '7': case '8': case '9':
            do {
                c = nextc();
            } while (Character.isDigit(c));
            pushback(c);
            if (last_state == EXPR_FNAME) {
                identValue = createTokenString().intern();
                return RipperParser.tGVAR;
            }

            String refAsString = createTokenString();

            try {
                Integer.parseInt(refAsString.substring(1).intern());
            } catch (NumberFormatException e) {
                warn("`" + refAsString + "' is too big for a number variable, always nil");
            }

            identValue = createTokenString().intern();
            return RipperParser.tNTH_REF;
        case '0':
            setState(EXPR_END);

            return identifierToken(last_state, RipperParser.tGVAR, ("$" + (char) c).intern());
        default:
            if (!isIdentifierChar(c)) {
                if (c == EOF || isSpace(c)) {
                    compile_error("`$' without identifiers is not allowed as a global variable name");
                } else {
                    pushback(c);
                    compile_error("`$" + ((char) c) + "' is not allowed as a global variable name");
                }
                return EOF;
            }

            last_state = lex_state;
            setState(EXPR_END);

            tokadd_ident(c);

            return identifierToken(last_state, RipperParser.tGVAR, createTokenString().intern()); // $blah
        }
    }
    
    private int dot() throws IOException {
        int c;
        
        setState(EXPR_BEG);
        if ((c = nextc()) == '.') {
            if ((c = nextc()) == '.') return RipperParser.tDOT3;
            
            pushback(c);
            
            return RipperParser.tDOT2;
        }
        
        pushback(c);
        if (Character.isDigit(c)) compile_error("no .<digit> floating literal anymore; put 0 before dot");
        
        setState(EXPR_DOT);
        
        return RipperParser.tDOT;
    }
    
    private int doubleQuote(boolean commandState) throws IOException {
        int label = isLabelPossible(commandState) ? str_label : 0;
        lex_strterm = new StringTerm(str_dquote|label, '\0', '"');

        return RipperParser.tSTRING_BEG;
    }
    
    private int greaterThan() throws IOException {
        setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG);

        int c = nextc();

        switch (c) {
        case '=':
            return RipperParser.tGEQ;
        case '>':
            if ((c = nextc()) == '=') {
                setState(EXPR_BEG);
                return RipperParser.tOP_ASGN;
            }
            pushback(c);
            
            return RipperParser.tRSHFT;
        default:
            pushback(c);
            
            return RipperParser.tGT;
        }
    }

    private int identifier(int c, boolean commandState) throws IOException {
        if (!isIdentifierChar(c)) {
            String badChar = "\\" + Integer.toOctalString(c & 0xff);
            compile_error("Invalid char `" + badChar + "' ('" + (char) c + "') in expression");
        }
        // FIXME: on_kw: will return BOM as part of the ident string "\xfeffclass" on MRI and Yard also seems
        // to need this to properly parse.  So I record where token should really start so I can extract as
        // a proper ident for keyword check but createTempValue below will end up creating the bom + kw.  This feels like
        // an MRI bug but it is well baked into libraries at this point???  newtok+tokadd is still different from MRI
        // and does not construct a temp buf.  Once I convert that to be the same I think I can do exactly what MRI does
        // and this hack can disappear.
        int whereKeywordShouldStart = lex_p - 1;

        do {
            if (!tokadd_mbchar(c)) return EOF;
            c = nextc();
        } while (isIdentifierChar(c));

        boolean lastBangOrPredicate = false;

        // methods 'foo!' and 'foo?' are possible but if followed by '=' it is relop
        if (c == '!' || c == '?') {
            if (!peek('=')) {
                lastBangOrPredicate = true;
            } else {
                pushback(c);
            }
        } else {
            pushback(c);
        }
        
        int result = 0;

        last_state = lex_state;
        String tempVal;
        if (lastBangOrPredicate) {
            result = RipperParser.tFID;
            tempVal = createTokenString();
        } else {
            if (isLexState(lex_state, EXPR_FNAME)) {
                if ((c = nextc()) == '=') { 
                    int c2 = nextc();

                    if (c2 != '~' && c2 != '>' &&
                            (c2 != '=' || peek('>'))) {
                        result = RipperParser.tIDENTIFIER;
                        pushback(c2);
                    } else { 
                        pushback(c2);
                        pushback(c);
                    }
                } else {
                    pushback(c);
                }
            }
            tempVal = createTokenString();

            if (result == 0 && Character.isUpperCase(tempVal.charAt(0))) {
                result = RipperParser.tCONSTANT;
            } else {
                result = RipperParser.tIDENTIFIER;
            }
        }

        if (isLabelPossible(commandState)) {
            if (isLabelSuffix()) {
                setState(EXPR_ARG|EXPR_LABELED);
                nextc();
                identValue = tempVal.intern();
                return RipperParser.tLABEL;
            }
        }

        if (lex_state != EXPR_DOT) {
            Keyword keyword = getKeyword(createTokenString(whereKeywordShouldStart)); // Is it is a keyword?

            if (keyword != null) {
                int state = lex_state; // Save state at time keyword is encountered
                setState(keyword.state);

                if (isLexState(state, EXPR_FNAME)) {
                    identValue = tempVal;
                    return keyword.id0;
                }

                if (isLexState(lex_state, EXPR_BEG)) commandStart = true;

                if (keyword.id0 == RipperParser.keyword_do) return doKeyword(state);

                if (isLexState(state, EXPR_BEG|EXPR_LABELED)) {
                    return keyword.id0;
                } else {
                    if (keyword.id0 != keyword.id1) setState(EXPR_BEG|EXPR_LABEL);
                    return keyword.id1;
                }
            }
        }

        if (isLexState(lex_state, EXPR_BEG_ANY|EXPR_ARG_ANY|EXPR_DOT)) {
            setState(commandState ? EXPR_CMDARG : EXPR_ARG);
        } else if (lex_state == EXPR_FNAME) {
            setState(EXPR_ENDFN);
        } else {
            setState(EXPR_END);
        }
        
        return identifierToken(last_state, result, tempVal.intern());
    }

    private int leftBracket(boolean spaceSeen) throws IOException {
        parenNest++;
        int c = '[';
        if (isAfterOperator()) {
            if ((c = nextc()) == ']') {
                setState(EXPR_ARG);
                if (peek('=')) {
                    nextc();
                    return RipperParser.tASET;
                }
                return RipperParser.tAREF;
            }
            pushback(c);
            setState(EXPR_ARG|EXPR_LABEL);
            return '[';
        } else if (isBEG() || (isARG() && (spaceSeen || isLexState(lex_state, EXPR_LABELED)))) {
            c = RipperParser.tLBRACK;
        }

        setState(EXPR_BEG|EXPR_LABEL);
        conditionState.stop();
        cmdArgumentState.stop();
        yaccValue = "[";
        return c;
    }

    private int leftCurly() {
        braceNest++;
        int leftParenBegin = getLeftParenBegin();
        if (leftParenBegin > 0 && leftParenBegin == parenNest) {
            setState(EXPR_BEG);
            setLeftParenBegin(0);
            parenNest--;
            conditionState.stop();
            cmdArgumentState.stop();
            return RipperParser.tLAMBEG;
        }

        char c;
        if (isLexState(lex_state, EXPR_LABELED)) {
            c = RipperParser.tLBRACE;
        } else if (isLexState(lex_state, EXPR_ARG_ANY|EXPR_END|EXPR_ENDFN)) { // block (primary)
            c = RipperParser.tLCURLY;
        } else if (isLexState(lex_state, EXPR_ENDARG)) { // block (expr)
            c = RipperParser.tLBRACE_ARG;
        } else { // hash
            c = RipperParser.tLBRACE;
        }

        conditionState.stop();
        cmdArgumentState.stop();
        setState(EXPR_BEG);
        setState(c == RipperParser.tLBRACE_ARG ? EXPR_BEG : EXPR_BEG|EXPR_LABEL);
        if (c != RipperParser.tLBRACE) commandStart = true;

        return c;
    }

    private int leftParen(boolean spaceSeen) throws IOException {
        int result;

        if (isBEG()) {
            result = RipperParser.tLPAREN;
        } else if (isSpaceArg('(', spaceSeen)) {
            result = RipperParser.tLPAREN_ARG;
        } else {
            result = RipperParser.tLPAREN2;
        }

        parenNest++;
        conditionState.stop();
        cmdArgumentState.stop();
        setState(EXPR_BEG|EXPR_LABEL);

        return result;
    }
    
    private int lessThan(boolean spaceSeen) throws IOException {
        last_state = lex_state;
        int c = nextc();
        if (c == '<' && !isLexState(lex_state, EXPR_DOT|EXPR_CLASS) &&
                !isEND() && (!isARG() || isLexState(lex_state, EXPR_LABELED) || spaceSeen)) {
            int tok = hereDocumentIdentifier();
            
            if (tok != 0) return tok;
        }

        setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG);
        
        switch (c) {
        case '=':
            if ((c = nextc()) == '>') {
                return RipperParser.tCMP;
            }
            pushback(c);
            return RipperParser.tLEQ;
        case '<':
            if ((c = nextc()) == '=') {
                setState(EXPR_BEG);
                return RipperParser.tOP_ASGN;
            }
            pushback(c);
            warn_balanced(c, spaceSeen, "<<", "here document");
            return RipperParser.tLSHFT;
        default:
            pushback(c);
            return RipperParser.tLT;
        }
    }
    
    private int minus(boolean spaceSeen) throws IOException {
        int c = nextc();
        
        if (isAfterOperator()) {
            setState(EXPR_ARG);
            if (c == '@') {
                return RipperParser.tUMINUS;
            }
            pushback(c);
            return RipperParser.tMINUS;
        }
        if (c == '=') {
            setState(EXPR_BEG);

            return RipperParser.tOP_ASGN;
        }
        if (c == '>') {
            setState(EXPR_ENDFN);
            return RipperParser.tLAMBDA;
        }
        if (isBEG() || (isSpaceArg(c, spaceSeen) && arg_ambiguous())) {
            setState(EXPR_BEG);
            pushback(c);
            if (Character.isDigit(c)) {
                return RipperParser.tUMINUS_NUM;
            }
            return RipperParser.tUMINUS;
        }
        setState(EXPR_BEG);
        pushback(c);
        warn_balanced(c, spaceSeen, "-", "unary operator");
        return RipperParser.tMINUS;
    }

    private int percent(boolean spaceSeen) throws IOException {
        if (isBEG()) return parseQuote(nextc());

        int c = nextc();

        if (c == '=') {
            setState(EXPR_BEG);
            return RipperParser.tOP_ASGN;
        }

        if (isSpaceArg(c, spaceSeen) || (isLexState(lex_state, EXPR_FITEM) && c == 's')) return parseQuote(c);

        setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG);

        pushback(c);
        warn_balanced(c, spaceSeen, "%", "string literal");
        return RipperParser.tPERCENT;
    }

    private int pipe() throws IOException {
        int c = nextc();
        
        switch (c) {
        case '|':
            setState(EXPR_BEG);
            if ((c = nextc()) == '=') {
                setState(EXPR_BEG);
                return RipperParser.tOP_ASGN;
            }
            pushback(c);
            return RipperParser.tOROP;
        case '=':
            setState(EXPR_BEG);
            
            return RipperParser.tOP_ASGN;
            default:
                setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG|EXPR_LABEL);

            pushback(c);
            
            return RipperParser.tPIPE;
        }
    }
    
    private int plus(boolean spaceSeen) throws IOException {
        int c = nextc();
        if (isAfterOperator()) {
            setState(EXPR_ARG);
            if (c == '@') return RipperParser.tUPLUS;

            pushback(c);

            return RipperParser.tPLUS;
        }
        
        if (c == '=') {
            setState(EXPR_BEG);

            return RipperParser.tOP_ASGN;
        }

        if (isBEG() || (isSpaceArg(c, spaceSeen) && arg_ambiguous())) {
            setState(EXPR_BEG);
            pushback(c);
            if (Character.isDigit(c)) {
                c = '+';
                return parseNumber(c);
            }

            return RipperParser.tUPLUS;
        }

        setState(EXPR_BEG);
        pushback(c);
        warn_balanced(c, spaceSeen, "+", "unary operator");
        return RipperParser.tPLUS;
    }

    // FIXME: This is a bit different than regular parser but the problem
    // I ran into was not returning the '?' with the char it is finding.
    // This in part must be some difference between MRI and our lexer impls
    // doing things a little differently.
    private int questionMark() throws IOException {
        int c;
        
        if (isEND()) {
            setState(EXPR_VALUE);

            return '?';
        }
        
        c = nextc();
        if (c == EOF) {
            compile_error("incomplete character syntax");
            return EOF;
        }

        if (Character.isWhitespace(c)){
            if (!isARG()) {
                int c2 = 0;
                switch (c) {
                case ' ':
                    c2 = 's';
                    break;
                case '\n':
                    c2 = 'n';
                    break;
                case '\t':
                    c2 = 't';
                    break;
                        /* What is \v in C?
                    case '\v':
                        c2 = 'v';
                        break;
                        */
                case '\r':
                    c2 = 'r';
                    break;
                case '\f':
                    c2 = 'f';
                    break;
                }
                if (c2 != 0) {
                    warn("invalid character syntax; use ?\\" + c2);
                }
            }
            pushback(c);
            setState(EXPR_VALUE);
            return '?';
        } else if (isASCII()) {
            ByteList buffer = new ByteList(1);
            if (!tokenAddMBC(c, buffer)) return EOF;

            setState(EXPR_END);
            return RipperParser.tCHAR;
        } else if (isIdentifierChar(c) && !peek('\n') && isNext_identchar()) {
            pushback(c);
            setState(EXPR_VALUE);

            return '?';
        } else if (c == '\\') {
            if (peek('u')) {
                nextc(); // Eat 'u'
                ByteList oneCharBL = new ByteList(2);
                c = readUTFEscape(oneCharBL, false, false);
                
                if (c >= 0x80) {
                    tokenAddMBC(c, oneCharBL);
                } else {
                    oneCharBL.append(c);
                }
                
                setState(EXPR_END);

                return RipperParser.tINTEGER; // FIXME: This should be something else like a tCHAR in 1.9/2.0
            } else {
                c = readEscape();
            }
        }

        setState(EXPR_END);
        // TODO: this isn't handling multibyte yet
        ByteList oneCharBL = new ByteList(1);
        oneCharBL.append(c);
        return RipperParser.tCHAR;
    }

    private int rightBracket() {
        parenNest--;
        conditionState.restart();
        cmdArgumentState.restart();
        setState(EXPR_END);
        return RipperParser.tRBRACK;
    }

    private int rightCurly() {
        conditionState.restart();
        cmdArgumentState.restart();
        setState(EXPR_END);
        //System.out.println("braceNest: " + braceNest);
        int tok = braceNest == 0 ? RipperParser.tSTRING_DEND : RipperParser.tRCURLY;
        braceNest--;
        return tok;
    }

    private int rightParen() {
        parenNest--;
        conditionState.restart();
        cmdArgumentState.restart();
        setState(EXPR_ENDFN);
        return RipperParser.tRPAREN;
    }
    
    private int singleQuote(boolean commandState) throws IOException {
        int label = isLabelPossible(commandState) ? str_label : 0;
        lex_strterm = new StringTerm(str_squote|label, '\0', '\'');
        return RipperParser.tSTRING_BEG;
    }
    
    private int slash(boolean spaceSeen) throws IOException {
        if (isBEG()) {
            lex_strterm = new StringTerm(str_regexp, '\0', '/');
            
            return RipperParser.tREGEXP_BEG;
        }
        
        int c = nextc();
        
        if (c == '=') {
            setState(EXPR_BEG);
            
            return RipperParser.tOP_ASGN;
        }
        pushback(c);
        if (isSpaceArg(c, spaceSeen)) {
            arg_ambiguous();
            lex_strterm = new StringTerm(str_regexp, '\0', '/');
            
            return RipperParser.tREGEXP_BEG;
        }

        setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG);


        warn_balanced(c, spaceSeen, "/", "regexp literal");
        return RipperParser.tDIVIDE;
    }

    private int star(boolean spaceSeen) throws IOException {
        int c = nextc();
        
        switch (c) {
        case '*':
            if ((c = nextc()) == '=') {
                setState(EXPR_BEG);

                return RipperParser.tOP_ASGN;
            }

            pushback(c);


            if (isSpaceArg(c, spaceSeen)) {
                if (isVerbose() && Options.PARSER_WARN_ARGUMENT_PREFIX.load()) warning("`**' interpreted as argument prefix");
                c = RipperParser.tDSTAR;
            } else if (isBEG()) {
                c = RipperParser.tDSTAR;
            } else {
                warn_balanced(c, spaceSeen, "**", "argument prefix");
                c = RipperParser.tPOW;
            }
            break;
        case '=':
            setState(EXPR_BEG);

            return RipperParser.tOP_ASGN;
        default:
            pushback(c);
            if (isSpaceArg(c, spaceSeen)) {
                if (isVerbose() && Options.PARSER_WARN_ARGUMENT_PREFIX.load()) warning("`*' interpreted as argument prefix");
                c = RipperParser.tSTAR;
            } else if (isBEG()) {
                c = RipperParser.tSTAR;
            } else {
                warn_balanced(c, spaceSeen, "*", "argument prefix");
                c = RipperParser.tSTAR2;
            }

        }

        setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG);
        return c;
    }

    private int tilde() throws IOException {
        int c;
        
        if (isAfterOperator()) {
            if ((c = nextc()) != '@') pushback(c);
            setState(EXPR_ARG);
        } else {
            setState(EXPR_BEG);
        }
        
        return RipperParser.tTILDE;
    }

    private ByteList numberBuffer = new ByteList(10); // ascii is good enough.
     Parse a number from the input stream.
Params: c – The first character of the number.
Returns: A int constant which represents a token./**
     *  Parse a number from the input stream.
     *
     *@param c The first character of the number.
     *@return A int constant which represents a token.
     */
    private int parseNumber(int c) throws IOException {
        setState(EXPR_END);

        numberBuffer.setRealSize(0);

        if (c == '-') {
        	numberBuffer.append((char) c);
            c = nextc();
        } else if (c == '+') {
        	// We don't append '+' since Java number parser gets confused
            c = nextc();
        }
        
        int nondigit = 0;

        if (c == '0') {
            int startLen = numberBuffer.length();

            switch (c = nextc()) {
                case 'x' :
                case 'X' : //  hexadecimal
                    c = nextc();
                    if (isHexChar(c)) {
                        for (;; c = nextc()) {
                            if (c == '_') {
                                if (nondigit != '\0') break;
                                nondigit = c;
                            } else if (isHexChar(c)) {
                                nondigit = '\0';
                                numberBuffer.append((char) c);
                            } else {
                                break;
                            }
                        }
                    }
                    pushback(c);

                    if (numberBuffer.length() == startLen) {
                        compile_error("Hexadecimal number without hex-digits.");
                    } else if (nondigit != '\0') {
                        compile_error("Trailing '_' in number.");
                    }
                    return setIntegerLiteral(numberLiteralSuffix(SUFFIX_ALL));
                case 'b' :
                case 'B' : // binary
                    c = nextc();
                    if (c == '0' || c == '1') {
                        for (;; c = nextc()) {
                            if (c == '_') {
                                if (nondigit != '\0') break;
								nondigit = c;
                            } else if (c == '0' || c == '1') {
                                nondigit = '\0';
                                numberBuffer.append((char) c);
                            } else {
                                break;
                            }
                        }
                    }
                    pushback(c);

                    if (numberBuffer.length() == startLen) {
                        compile_error("Binary number without digits.");
                    } else if (nondigit != '\0') {
                        compile_error("Trailing '_' in number.");
                    }
                    return setIntegerLiteral(numberLiteralSuffix(SUFFIX_ALL));
                case 'd' :
                case 'D' : // decimal
                    c = nextc();
                    if (Character.isDigit(c)) {
                        for (;; c = nextc()) {
                            if (c == '_') {
                                if (nondigit != '\0') break;
								nondigit = c;
                            } else if (Character.isDigit(c)) {
                                nondigit = '\0';
                                numberBuffer.append((char) c);
                            } else {
                                break;
                            }
                        }
                    }
                    pushback(c);

                    if (numberBuffer.length() == startLen) {
                        compile_error("Binary number without digits.");
                    } else if (nondigit != '\0') {
                        compile_error("Trailing '_' in number.");
                    }
                    return setIntegerLiteral(numberLiteralSuffix(SUFFIX_ALL));
                case 'o':
                case 'O':
                    c = nextc();
                case '0': case '1': case '2': case '3': case '4': //Octal
                case '5': case '6': case '7': case '_': 
                    for (;; c = nextc()) {
                        if (c == '_') {
                            if (nondigit != '\0') break;

							nondigit = c;
                        } else if (c >= '0' && c <= '7') {
                            nondigit = '\0';
                            numberBuffer.append((char) c);
                        } else {
                            break;
                        }
                    }
                    if (numberBuffer.length() > startLen) {
                        pushback(c);

                        if (nondigit != '\0') compile_error("Trailing '_' in number.");

                        return setIntegerLiteral(numberLiteralSuffix(SUFFIX_ALL));
                    }
                case '8' :
                case '9' :
                    compile_error("Illegal octal digit.");
                case '.' :
                case 'e' :
                case 'E' :
                	numberBuffer.append('0');
                    break;
                default :
                    pushback(c);
                    numberBuffer.append('0');
                    return setIntegerLiteral(numberLiteralSuffix(SUFFIX_ALL));
            }
        }

        boolean seen_point = false;
        boolean seen_e = false;

        for (;; c = nextc()) {
            switch (c) {
                case '0' :
                case '1' :
                case '2' :
                case '3' :
                case '4' :
                case '5' :
                case '6' :
                case '7' :
                case '8' :
                case '9' :
                    nondigit = '\0';
                    numberBuffer.append((char) c);
                    break;
                case '.' :
                    if (nondigit != '\0') {
                        pushback(c);
                        compile_error("Trailing '_' in number.");
                    } else if (seen_point || seen_e) {
                        pushback(c);
                        return getNumberLiteral(numberBuffer.toString(), seen_e, seen_point, nondigit);
                    } else {
                    	int c2;
                        if (!Character.isDigit(c2 = nextc())) {
                            pushback(c2);
                        	pushback('.');
                            if (c == '_') { 
                            		// Enebo:  c can never be antrhign but '.'
                            		// Why did I put this here?
                            } else {
                                return getNumberLiteral(numberBuffer.toString(), seen_e, seen_point, nondigit);
                            }
                        } else {
                            numberBuffer.append('.');
                            numberBuffer.append((char) c2);
                            seen_point = true;
                            nondigit = '\0';
                        }
                    }
                    break;
                case 'e' :
                case 'E' :
                    if (nondigit != '\0') {
                        compile_error("Trailing '_' in number.");
                    } else if (seen_e) {
                        pushback(c);
                        return getNumberLiteral(numberBuffer.toString(), seen_e, seen_point, nondigit);
                    } else {
                        numberBuffer.append((char) c);
                        seen_e = true;
                        nondigit = c;
                        c = nextc();
                        if (c == '-' || c == '+') {
                            numberBuffer.append((char) c);
                            nondigit = c;
                        } else {
                            pushback(c);
                        }
                    }
                    break;
                case '_' : //  '_' in number just ignored
                    if (nondigit != '\0') {
                        compile_error("Trailing '_' in number.");
                    }
                    nondigit = c;
                    break;
                default :
                    pushback(c);
                    return getNumberLiteral(numberBuffer.toString(), seen_e, seen_point, nondigit);
            }
        }
    }

    // MRI: This is decode_num: chunk
    private int getNumberLiteral(String number, boolean seen_e, boolean seen_point, int nondigit) throws IOException {
        if (nondigit != '\0') compile_error("Trailing '_' in number.");

        boolean isFloat = seen_e || seen_point;
        if (isFloat) {
            int suffix = numberLiteralSuffix(seen_e ? SUFFIX_I : SUFFIX_ALL);
            return setNumberLiteral(getFloatToken(number, suffix), suffix);
        }

        return setIntegerLiteral(numberLiteralSuffix(SUFFIX_ALL));
    }

    private int setNumberLiteral(int type, int suffix) {
        if ((suffix & SUFFIX_I) != 0) type = RipperParser.tIMAGINARY;

        setState(EXPR_END|EXPR_ENDARG);
        return type;
    }

    private int setIntegerLiteral(int suffix) {
        int type = (suffix & SUFFIX_R) != 0 ? RipperParser.tRATIONAL : RipperParser.tINTEGER;

        return setNumberLiteral(type, suffix);
    }

    // Note: parser_tokadd_utf8 variant just for regexp literal parsing.  This variant is to be
    // called when string_literal and regexp_literal.
    public void readUTFEscapeRegexpLiteral(ByteList buffer) throws IOException {
        buffer.append('\\');
        buffer.append('u');

        if (peek('{')) { // handle \\u{...}
            do {
                buffer.append(nextc());
                if (scanHexLiteral(buffer, 6, false, "invalid Unicode escape") > 0x10ffff) {
                    compile_error("invalid Unicode codepoint (too large)");
                }
            } while (peek(' ') || peek('\t'));

            int c = nextc();
            if (c != '}') {
                compile_error("unterminated Unicode escape");
            }
            buffer.append((char) c);
        } else { // handle \\uxxxx
            scanHexLiteral(buffer, 4, true, "Invalid Unicode escape");
        }
    }

    // mri: parser_tokadd_mbchar
    // This is different than MRI in that we return a boolean since we only care whether it was added
    // or not.  The MRI version returns the byte supplied which is never used as a value.
    public boolean tokenAddMBC(int first_byte, ByteList buffer) {
        return tokadd_mbchar(first_byte, buffer);
    }

    // MRI: parser_tokadd_utf8 sans regexp literal parsing
    public int readUTFEscape(ByteList buffer, boolean stringLiteral, boolean symbolLiteral) throws IOException {
        int codepoint;
        int c;

        if (peek('{')) { // handle \\u{...}
            do {
                nextc(); // Eat curly or whitespace
                codepoint = scanHex(6, false, "invalid Unicode escape");
                if (codepoint > 0x10ffff) {
                    compile_error("invalid Unicode codepoint (too large)");
                }
                if (buffer != null) readUTF8EscapeIntoBuffer(codepoint, buffer, stringLiteral);
            } while (peek(' ') || peek('\t'));

            c = nextc();
            if (c != '}') {
                compile_error("unterminated Unicode escape");
            }
        } else { // handle \\uxxxx
            codepoint = scanHex(4, true, "Invalid Unicode escape");
            if (buffer != null) readUTF8EscapeIntoBuffer(codepoint, buffer, stringLiteral);
        }

        return codepoint;
    }
    
    private void readUTF8EscapeIntoBuffer(int codepoint, ByteList buffer, boolean stringLiteral) {
        if (codepoint >= 0x80) {
            buffer.setEncoding(UTF8_ENCODING);
            if (stringLiteral) tokenAddMBC(codepoint, buffer);
        } else if (stringLiteral) {
            buffer.append((char) codepoint);
        }
    }    
    
    public int readEscape() throws IOException {
        int c = nextc();

        switch (c) {
            case '\\' : // backslash
                return c;
            case 'n' : // newline
                return '\n';
            case 't' : // horizontal tab
                return '\t';
            case 'r' : // carriage return
                return '\r';
            case 'f' : // form feed
                return '\f';
            case 'v' : // vertical tab
                return '\u000B';
            case 'a' : // alarm(bell)
                return '\u0007';
            case 'e' : // escape
                return '\u001B';
            case '0' : case '1' : case '2' : case '3' : // octal constant
            case '4' : case '5' : case '6' : case '7' :
                pushback(c);
                return scanOct(3);
            case 'x' : // hex constant
                return scanHex(2, false, "Invalid escape character syntax");
            case 'b' : // backspace
                return '\010';
            case 's' : // space
                return ' ';
            case 'M' :
                if ((c = nextc()) != '-') {
                    compile_error("Invalid escape character syntax");
                } else if ((c = nextc()) == '\\') {
                    return (char) (readEscape() | 0x80);
                } else if (c == EOF) {
                    compile_error("Invalid escape character syntax");
                } 
                return (char) ((c & 0xff) | 0x80);
            case 'C' :
                if (nextc() != '-') {
                    compile_error("Invalid escape character syntax");
                }
            case 'c' :
                if ((c = nextc()) == '\\') {
                    c = readEscape();
                } else if (c == '?') {
                    return '\177';
                } else if (c == EOF) {
                    compile_error("Invalid escape character syntax");
                }
                return (char) (c & 0x9f);
            case EOF :
                compile_error("Invalid escape character syntax");
            default :
                return c;
        }
    }

    Read up to count hexadecimal digits and store those digits in a token buffer.  If strict is
provided then count number of hex digits must be present. If no digits can be read a syntax
exception will be thrown.  This will also return the codepoint as a value so codepoint
ranges can be checked.
/**
     * Read up to count hexadecimal digits and store those digits in a token buffer.  If strict is
     * provided then count number of hex digits must be present. If no digits can be read a syntax
     * exception will be thrown.  This will also return the codepoint as a value so codepoint
     * ranges can be checked.
     */
    private char scanHexLiteral(ByteList buffer, int count, boolean strict, String errorMessage)
            throws IOException {
        int i = 0;
        char hexValue = '\0';

        for (; i < count; i++) {
            int h1 = nextc();

            if (!isHexChar(h1)) {
                pushback(h1);
                break;
            }

            buffer.append(h1);

            hexValue <<= 4;
            hexValue |= Integer.parseInt(String.valueOf((char) h1), 16) & 15;
        }

        // No hex value after the 'x'.
        if (i == 0 || strict && count != i) compile_error(errorMessage);

        return hexValue;
    }

    Read up to count hexadecimal digits.  If strict is provided then count number of hex
digits must be present. If no digits can be read a syntax exception will be thrown.
/**
     * Read up to count hexadecimal digits.  If strict is provided then count number of hex
     * digits must be present. If no digits can be read a syntax exception will be thrown.
     */
    private int scanHex(int count, boolean strict, String errorMessage) throws IOException {
        int i = 0;
        int hexValue = '\0';

        for (; i < count; i++) {
            int h1 = nextc();

            if (!isHexChar(h1)) {
                pushback(h1);
                break;
            }

            hexValue <<= 4;
            hexValue |= Integer.parseInt(String.valueOf((char) h1), 16) & 15;
        }

        // No hex value after the 'x'.
        if (i == 0 || (strict && count != i)) compile_error(errorMessage);

        return hexValue;
    }
}
Params:	c – first character the the quote construct
Returns:	a token that specifies the quote type
Params:	c – The first character of the number.
Returns:	A int constant which represents a token.
/

org.jruby/ jruby-core/ 9.2.9.0/ org/jruby/ext/ripper/RipperLexer.java