/*
 * Copyright (c) 1999, 2001, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
/*
 * COMPONENT_NAME: idl.parser
 *
 * ORIGINS: 27
 *
 * Licensed Materials - Property of IBM
 * 5639-D57 (C) COPYRIGHT International Business Machines Corp. 1997, 1999
 * RMI-IIOP v1.0
 *
 */

package com.sun.tools.corba.se.idl;

// NOTES:
// -F46082.51<daz> Remove -stateful feature.
// -D56351<daz> Update computation of RepositoryIDs to CORBA 2.3 (see spec.).
// -D59166<daz> Add escaped-id. info. to identifiers.
// -F60858.1<daz> Add support for -corba option, levels 2.2 and 2.3: accept 2.3
//   keywords as ids.; accept ids. that match keywords in letter, but not in case.
// -D62023<daz> Add support for -corba option, level 2.4: see keyword checking.

import java.io.EOFException;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;

import java.util.Enumeration;
import java.util.Stack;
import java.util.StringTokenizer;
import java.util.Vector;

/**
 *
 **/
class Scanner
{
  // <f46082.51> -Remove stateful feature.
  //Scanner (IncludeEntry file, String[] keywords, boolean vbose, boolean scanStateful, boolean emitAllIncludes) throws IOException
  // <f60858.1>
  //Scanner (IncludeEntry file, String[] keywords, boolean vbose, boolean emitAllIncludes) throws IOException
  /**
   *
   **/
  Scanner (IncludeEntry file, String[] keywords, boolean vbose,
      boolean emitAllIncludes, float cLevel, boolean debug) throws IOException
  {
    readFile (file);
    verbose  = vbose;
    // <f46082.51>
    //stateful = scanStateful;
    emitAll  = emitAllIncludes;
    sortKeywords (keywords);
    corbaLevel = cLevel;
    this.debug = debug ;
  } // ctor

  /**
   *
   **/
  void sortKeywords (String[] keywords)
  {
    for (int i = 0; i < keywords.length; ++i)
      if (wildcardAtEitherEnd (keywords[i]))
        this.openEndedKeywords.addElement (keywords[i]);
      else if (wildcardsInside (keywords[i]))
        this.wildcardKeywords.addElement (keywords[i]);
      else
        this.keywords.addElement (keywords[i]);
  } // sortKeywords

  /**
   *
   **/
  private boolean wildcardAtEitherEnd (String string)
  {
    return string.startsWith ("*") ||
           string.startsWith ("+") ||
           string.startsWith (".") ||
           string.endsWith ("*") ||
           string.endsWith ("+") ||
           string.endsWith (".");
  } // wildcardAtEitherEnd

  /**
   *
   **/
  private boolean wildcardsInside (String string)
  {
    return string.indexOf ("*") > 0 ||
           string.indexOf ("+") > 0 ||
           string.indexOf (".") > 0;
  } // wildcardsInside

  /**
   *
   **/
  void readFile (IncludeEntry file) throws IOException
  {
    String filename = file.name ();
    filename = filename.substring (1, filename.length () - 1);
    readFile (file, filename);
  } // readFile

  /**
   *
   **/
  void readFile (IncludeEntry file, String filename) throws IOException
  {
    data.fileEntry = file;
    data.filename = filename;
    // <f49747.1>
    //FileInputStream stream = new FileInputStream (data.filename);
    //data.fileBytes = new byte [stream.available ()];
    //stream.read (data.fileBytes);
    //stream.close (); <ajb>
    File idlFile = new File (data.filename);
    int len = (int)idlFile.length ();
    FileReader fileReader = new FileReader (idlFile);
    // <d41679> data.fileBytes = new char [len];
    final String EOL = System.getProperty ("line.separator");
    data.fileBytes = new char [len + EOL.length ()];

    fileReader.read (data.fileBytes, 0, len);
    fileReader.close ();

    // <d41679>
    for (int i = 0; i < EOL.length (); i++)
      data.fileBytes[len + i] = EOL.charAt (i);

    readChar ();
  } // readFile

  /**
   *
   **/
  Token getToken () throws IOException
  {
    //System.out.println ("Scanner.getToken char = |" + data.ch + "| (ASCII " + (int)data.ch + ").");

    // The token could be null if the next set of characters
    // is not a token:  white space, comments, ignored preprocessor
    // commands.
    Token token = null;
    String commentText = new String ("");

    while (token == null)
      try
      {
        data.oldIndex = data.fileIndex;
        data.oldLine  = data.line;
        if (data.ch <= ' ') {
          skipWhiteSpace ();
          continue;
        }

        // Special case for wchar and wstring literals.
        // The values are preceded by an L.
        //
        // Bug fix 4382578:  Can't compile a wchar literal.
        //
        // REVISIT.  This maps wchar/wstring literals to
        // our char/string literal types.  Eventually, we
        // need to write stronger checking to be spec
        // compliant in negative cases such as leaving the
        // L off of a wide string or putting it on a string.
        if (data.ch == 'L') {
            // Check to see if the next character is an
            // apostrophe.
            readChar();
            // Note:  This doesn't allow for space between
            // the L and the apostrophe or quote.
            if (data.ch == '\'') {
                // It was a wchar literal.  Get the value
                // and return the token.
                token = getCharacterToken(true);
                readChar();
                continue;
            } else
            if (data.ch == '"') {
                // It was a wstring literal.
                //
                // getUntil assumes we've already passed the
                // first quote.
                readChar ();
                token = new Token (Token.StringLiteral, getUntil ('"'), true);
                readChar ();
                continue;
            } else {
                // It must not have been a wchar literal.
                // Push the input back into the buffer, and
                // fall to the next if case.
                unread(data.ch);
                unread('L');
                readChar();
            }
        }

        if ((data.ch >= 'a' && data.ch <= 'z') ||
            (data.ch >= 'A' && data.ch <= 'Z') ||
            // <f46082.40> Escaped identifier; see data member comments.
            //(data.ch == '_' && underscoreOK)   || <daz>
            (data.ch == '_')   ||
            Character.isLetter (data.ch)) {
            token = getString ();
        } else
        if ((data.ch >= '0' && data.ch <= '9') || data.ch == '.') {
            token = getNumber ();
        } else {
          switch (data.ch)
          {
            case ';':
              token = new Token (Token.Semicolon);
              break;
            case '{':
              token = new Token (Token.LeftBrace);
              break;
            case '}':
              token = new Token (Token.RightBrace);
              break;
            case ':':
              readChar ();
              if (data.ch == ':')
                token = new Token (Token.DoubleColon);
              else
              {
                unread (data.ch);
                token = new Token (Token.Colon);
              }
              break;
            case ',':
              token = new Token (Token.Comma);
              break;
            case '=':
              readChar ();
              if (data.ch == '=')
                token = new Token (Token.DoubleEqual);
              else
              {
                unread (data.ch);
                token = new Token (Token.Equal);
              }
              break;
            case '+':
              token = new Token (Token.Plus);
              break;
            case '-':
              token = new Token (Token.Minus);
              break;
            case '(':
              token = new Token (Token.LeftParen);
              break;
            case ')':
              token = new Token (Token.RightParen);
              break;
            case '<':
              readChar ();
              if (data.ch == '<')
                token = new Token (Token.ShiftLeft);
              else if (data.ch == '=')
                token = new Token (Token.LessEqual);
              else
              {
                unread (data.ch);
                token = new Token (Token.LessThan);
              }
              break;
            case '>':
              readChar ();
              if (data.ch == '>')
                token = new Token (Token.ShiftRight);
              else if (data.ch == '=')
                token = new Token (Token.GreaterEqual);
              else
              {
                unread (data.ch);
                token = new Token (Token.GreaterThan);
              }
              break;
            case '[':
              token = new Token (Token.LeftBracket);
              break;
            case ']':
              token = new Token (Token.RightBracket);
              break;
            case '\'':
              token = getCharacterToken(false);
              break;
            case '"':
              readChar ();
              token = new Token (Token.StringLiteral, getUntil ('"', false, false, false));
              break;
            case '\\':
              readChar ();
              // If this is at the end of a line, then it is the
              // line continuation character - treat it as white space
              if (data.ch == '\n' || data.ch == '\r')
                token = null;
              else
                token = new Token (Token.Backslash);
              break;
            case '|':
              readChar ();
              if (data.ch == '|')
                token = new Token (Token.DoubleBar);
              else
              {
                unread (data.ch);
                token = new Token (Token.Bar);
              }
              break;
            case '^':
              token = new Token (Token.Carat);
              break;
            case '&':
              readChar ();
              if (data.ch == '&')
                token = new Token (Token.DoubleAmpersand);
              else
              {
                unread (data.ch);
                token = new Token (Token.Ampersand);
              }
              break;
            case '*':
              token = new Token (Token.Star);
              break;
            case '/':
              readChar ();
              // <21jul1997daz>  Extract comments rather than skipping them.
              // Preserve only the comment immediately preceding the next token.
              if (data.ch == '/')
                //skipLineComment ();
                commentText = getLineComment();
              else if (data.ch == '*')
                //skipBlockComment ();
                commentText = getBlockComment();
              else
              {
                unread (data.ch);
                token = new Token (Token.Slash);
              }
              break;
            case '%':
              token = new Token (Token.Percent);
              break;
            case '~':
              token = new Token (Token.Tilde);
              break;

            // The period token is recognized in getNumber.
            // The period is only valid in a floating ponit number.
            //case '.':
            //  token = new Token (Token.Period);
            //  break;

            case '#':
              token = getDirective ();
              break;
            case '!':
              readChar ();
              if (data.ch == '=')
                token = new Token (Token.NotEqual);
              else
              {
                unread (data.ch);
                token = new Token (Token.Exclamation);
              }
              break;
            case '?':
              try
              {
                token = replaceTrigraph ();
                break;
              }
              catch (InvalidCharacter e) {}
            default:
              throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch);
          }
          readChar ();
        }
      }
      catch (EOFException e)
      {
        token = new Token (Token.EOF);
      }

    // Transfer comment to parser via token.  <daz>21jul1997
    token.comment = new Comment( commentText );

    //System.out.println ("Scanner.getToken returning token.type = " + token.type);
    //if (token.type == Token.Identifier || token.type == Token.MacroIdentifier || (token.type >= Token.BooleanLiteral && token.type <= Token.StringLiteral))
    //  System.out.println ("Scanner.getToken returns token.name = " + token.name);

    if (debug)
        System.out.println( "Token: " + token ) ;

    return token;
  } // getToken

  /**
   *
   **/
  void scanString (String string)
  {
    dataStack.push (data);

    data = new ScannerData (data);

    data.fileIndex = 0;
    data.oldIndex  = 0;
    // <f49747.1> data.fileBytes = string.getBytes (); <ajb>
    int strLen = string.length();
    data.fileBytes = new char[strLen];
    string.getChars (0, strLen, data.fileBytes, 0);

    data.macrodata = true;

    try {readChar ();} catch (IOException e) {}
  } // scanString

  /**
   *
   **/
  void scanIncludedFile (IncludeEntry file, String filename, boolean includeIsImport) throws IOException
  {
    dataStack.push (data);
    data = new ScannerData ();
    data.indent = ((ScannerData)dataStack.peek ()).indent + ' ';
    data.includeIsImport = includeIsImport;
    try
    {
      readFile (file, filename);
      if (!emitAll && includeIsImport)
        SymtabEntry.enteringInclude ();
      // <d56351> As of CORBA 2.3, include files define new scope for Repository
      // ID prefixes. The previous Rep. ID is just below the top of the stack and
      // must be restored when the contents of this include file are parsed (see readCh()).
      Parser.enteringInclude ();

      if (verbose)
        System.out.println (data.indent + Util.getMessage ("Compile.parsing", filename));
    }
    catch (IOException e)
    {
      data = (ScannerData)dataStack.pop ();
      throw e;
    }
  } // scanIncludedFile

  /**
   *
   **/
  private void unread (char ch)
  {
    if (ch == '\n' && !data.macrodata) --data.line;
    --data.fileIndex;
  } // unread

  /**
   *
   **/
  void readChar () throws IOException
  {
    if (data.fileIndex >= data.fileBytes.length)
      if (dataStack.empty ())
        throw new EOFException ();
      else
      {
        // <d56351> Indicate end-of-scope for include file to parser.
        //Parser.exitingInclude ();

        // IBM.11666 - begin
        //if (!emitAll && data.includeIsImport && !data.macrodata)
        //{
        //SymtabEntry.exitingInclude ();
        //Parser.exitingInclude (); // <d59469>
        //}
        if (!data.macrodata)
        {
            if (!emitAll && data.includeIsImport)
                SymtabEntry.exitingInclude();
            Parser.exitingInclude();
        } // IBM.11666 - end

        if (verbose && !data.macrodata)
          System.out.println (data.indent + Util.getMessage ("Compile.parseDone", data.filename));
        data = (ScannerData)dataStack.pop ();
      }
    else
    {
      data.ch = (char)(data.fileBytes[data.fileIndex++] & 0x00ff);
      if (data.ch == '\n' && !data.macrodata) ++data.line;
    }
  } // readChar

  
Starting at a quote, reads a string with possible unicode or octal values until an end quote. Doesn't handle line feeds or comments.
/** * Starting at a quote, reads a string with possible * unicode or octal values until an end quote. Doesn't * handle line feeds or comments. */
private String getWString() throws IOException { readChar(); StringBuffer result = new StringBuffer(); while (data.ch != '"') { if (data.ch == '\\') { // Could be a \ooo octal or // unicode hex readChar(); if (data.ch == 'u') { // Unicode hex int num = getNDigitHexNumber(4); System.out.println("Got num: " + num); System.out.println("Which is: " + (int)(char)num); result.append((char)num); // result.append((char)getNDigitHexNumber(4)); // getNDigitHexNumber reads the next // character, so loop without reading another continue; } else if (data.ch >= '0' && data.ch <= '7') { // Octal result.append((char)get3DigitOctalNumber()); // get3DigitOctalNumber reads the next // character, so loop without reading another continue; } else { // Wasn't either, so just append the // slash and current character. result.append('\\'); result.append(data.ch); } } else { // Just append the character result.append(data.ch); } // Advance to the next character readChar(); } return result.toString(); } /** * **/ private Token getCharacterToken(boolean isWide) throws IOException { // The token name returned contains a string with two elements: // first the character appears, then the representation of the // character. These are typically the same, but they CAN be // different, for example "O\117" Token token = null; readChar (); if ( data.ch == '\\' ) { readChar (); if ((data.ch == 'x') || (data.ch == 'u')) { char charType = data.ch; int hexNum = getNDigitHexNumber ((charType == 'x') ? 2 : 4); return new Token (Token.CharacterLiteral, ((char)hexNum) + "\\" + charType + Integer.toString (hexNum, 16), isWide ); } if ((data.ch >= '0') && (data.ch <= '7')) { int octNum = get3DigitOctalNumber (); return new Token (Token.CharacterLiteral, ((char)octNum) + "\\" + Integer.toString (octNum, 8), isWide ); } return singleCharEscapeSequence (isWide); } token = new Token (Token.CharacterLiteral, "" + data.ch + data.ch, isWide ); readChar (); return token; } // getCharacterToken /** * **/ private Token singleCharEscapeSequence (boolean isWide) throws IOException { Token token; if (data.ch == 'n') // newline token = new Token (Token.CharacterLiteral, "\n\\n", isWide); else if (data.ch == 't') // horizontal tab token = new Token (Token.CharacterLiteral, "\t\\t", isWide); else if (data.ch == 'v') // vertical tab token = new Token (Token.CharacterLiteral, "\013\\v", isWide); else if (data.ch == 'b') // backspace token = new Token (Token.CharacterLiteral, "\b\\b", isWide); else if (data.ch == 'r') // carriage return token = new Token (Token.CharacterLiteral, "\r\\r", isWide); else if (data.ch == 'f') // form feed token = new Token (Token.CharacterLiteral, "\f\\f", isWide); else if (data.ch == 'a') // alert token = new Token (Token.CharacterLiteral, "\007\\a", isWide); else if (data.ch == '\\') // backslash token = new Token (Token.CharacterLiteral, "\\\\\\", isWide); else if (data.ch == '?') // question mark token = new Token (Token.CharacterLiteral, "?\\?", isWide); else if (data.ch == '\'') // single quote token = new Token (Token.CharacterLiteral, "'\\'", isWide); else if (data.ch == '"') // double quote token = new Token (Token.CharacterLiteral, "\"\\\"", isWide); else throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); readChar (); return token; } // singleCharEscapeSequence private Token getString () throws IOException { StringBuffer sbuf = new StringBuffer() ; boolean escaped = false; // <d59166> boolean[] collidesWithKeyword = { false } ; // <d62023> // <f46082.40> An escaped id. begins with '_', which is followed by a normal // identifier. Disallow prefixes of '_' having length > 1. if (data.ch == '_') { sbuf.append( data.ch ) ; readChar (); if (escaped = escapedOK) if (data.ch == '_') throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); } // Build up the string of valid characters until a non-string // character is encountered. while (Character.isLetterOrDigit( data.ch ) || (data.ch == '_')) { sbuf.append( data.ch ) ; readChar() ; } String string = sbuf.toString() ; // <f46082.40> Escaped identifiers - If identifier has '_' prefix, ignore // keyword check and strip '_'; otherwise, perform keyword check. if (!escaped) { // Escaped id ==> ignore keyword check Token result = Token.makeKeywordToken( string, corbaLevel, escapedOK, collidesWithKeyword ) ; if (result != null) return result ; } // At this point the string is an identifier. If it is a // string which is also a Java keyword, prepend an underscore // so that it doesn't generate a compiler error. string = getIdentifier (string); // If a left paren immediately follows, this could be a // macro definition, return a MacroIdentifier if (data.ch == '(') { readChar (); return new Token (Token.MacroIdentifier, string, escaped, collidesWithKeyword[0], false); } else return new Token (Token.Identifier, string, escaped, collidesWithKeyword[0], false); } // Wildcard values static final int Star = 0, Plus = 1, Dot = 2, None = 3; /** * **/ private boolean matchesClosedWildKeyword (String string) { boolean found = true; String tmpString = string; Enumeration e = wildcardKeywords.elements (); while (e.hasMoreElements ()) { int wildcard = None; StringTokenizer tokens = new StringTokenizer ((String)e.nextElement (), "*+.", true); if (tokens.hasMoreTokens ()) { String token = tokens.nextToken (); if (tmpString.startsWith (token)) { tmpString = tmpString.substring (token.length ()); while (tokens.hasMoreTokens () && found) { token = tokens.nextToken (); if (token.equals ("*")) wildcard = Star; else if (token.equals ("+")) wildcard = Plus; else if (token.equals (".")) wildcard = Dot; else if (wildcard == Star) { int index = tmpString.indexOf (token); if (index >= 0) tmpString = tmpString.substring (index + token.length ()); else found = false; } else if (wildcard == Plus) { int index = tmpString.indexOf (token); if (index > 0) tmpString = tmpString.substring (index + token.length ()); else found = false; } else if (wildcard == Dot) { int index = tmpString.indexOf (token); if (index == 1) tmpString = tmpString.substring (1 + token.length ()); else found = false; } } if (found && tmpString.equals ("")) break; } } } return found && tmpString.equals (""); } // matchesClosedWildKeyword /** * **/ private String matchesOpenWildcard (String string) { Enumeration e = openEndedKeywords.elements (); String prepend = ""; while (e.hasMoreElements ()) { int wildcard = None; boolean found = true; String tmpString = string; StringTokenizer tokens = new StringTokenizer ((String)e.nextElement (), "*+.", true); while (tokens.hasMoreTokens () && found) { String token = tokens.nextToken (); if (token.equals ("*")) wildcard = Star; else if (token.equals ("+")) wildcard = Plus; else if (token.equals (".")) wildcard = Dot; else if (wildcard == Star) { wildcard = None; int index = tmpString.lastIndexOf (token); if (index >= 0) tmpString = blankOutMatch (tmpString, index, token.length ()); else found = false; } else if (wildcard == Plus) { wildcard = None; int index = tmpString.lastIndexOf (token); if (index > 0) tmpString = blankOutMatch (tmpString, index, token.length ()); else found = false; } else if (wildcard == Dot) { wildcard = None; int index = tmpString.lastIndexOf (token); if (index == 1) tmpString = blankOutMatch (tmpString, 1, token.length ()); else found = false; } else if (wildcard == None) if (tmpString.startsWith (token)) tmpString = blankOutMatch (tmpString, 0, token.length ()); else found = false; } // Make sure that, if the last character of the keyword is a // wildcard, that the string matches what the wildcard // requires. if (found) { if (wildcard == Star) ; else if (wildcard == Plus && tmpString.lastIndexOf (' ') != tmpString.length () - 1) ; else if (wildcard == Dot && tmpString.lastIndexOf (' ') == tmpString.length () - 2) ; else if (wildcard == None && tmpString.lastIndexOf (' ') == tmpString.length () - 1) ; else found = false; } // If found, then prepend an underscore. But also try matching // again after leading and trailing blanks are removed from // tmpString. This isn't quite right, but it solves a problem // which surfaced in the Java mapping. For example: // openEndedKeywords = {"+Helper", "+Holder", "+Package"}; // string = fooHelperPackage. // Given the mechanics of the Java mapping, _fooHelperPackage // COULD have a conflict, so for each occurance of a keyword, // an underscore is added, so this would cause two underscores: // __fooHelperPackage. To accomplish this, the first time thru // tmpString is "fooHelper " at this point, strip off the // trailing blanks and try matching "fooHelper". This also // matches, so two underscores are prepended. if (found) { prepend = prepend + "_" + matchesOpenWildcard (tmpString.trim ()); break; } } return prepend; } // matchesOpenWildcard /** * **/ private String blankOutMatch (String string, int start, int length) { char[] blanks = new char [length]; for (int i = 0; i < length; ++i) blanks[i] = ' '; return string.substring (0, start) + new String (blanks) + string.substring (start + length); } // blankOutMatch /** * **/ private String getIdentifier (String string) { if (keywords.contains (string)) // string matches a non-wildcard keyword string = '_' + string; else { // Check to see if string matches any wildcard keywords that // aren't open ended (don't have a wildcard as the first or // last character. String prepend = ""; if (matchesClosedWildKeyword (string)) prepend = "_"; else // string did not match any closed wildcard keywords (that // is, keywords with wildcards anywhere but at the beginning // or end of the word). // Now check for * + or . at the beginning or end. // These require special handling because they could match // more than one keyword. prepend an underscore for each // matched keyword. prepend = matchesOpenWildcard (string); string = prepend + string; } return string; } // getIdentifier /** * **/ private Token getDirective () throws IOException { readChar (); String string = new String (); while ((data.ch >= 'a' && data.ch <= 'z') || (data.ch >= 'A' && data.ch <= 'Z')) { string = string + data.ch; readChar (); } unread (data.ch); for (int i = 0; i < Token.Directives.length; ++i) if (string.equals (Token.Directives[i])) return new Token (Token.FirstDirective + i); // If it got this far, it is an unknown preprocessor directive. return new Token (Token.Unknown, string); } // getDirective /** * **/ private Token getNumber () throws IOException { if (data.ch == '.') return getFractionNoInteger (); else if (data.ch == '0') return isItHex (); else // the only other possibliities are 1..9 return getInteger (); } // getNumber /** * **/ private Token getFractionNoInteger () throws IOException { readChar (); if (data.ch >= '0' && data.ch <= '9') return getFraction ("."); else return new Token (Token.Period); } // getFractionNoInteger /** * **/ private Token getFraction (String string) throws IOException { while (data.ch >= '0' && data.ch <= '9') { string = string + data.ch; readChar (); } if (data.ch == 'e' || data.ch == 'E') return getExponent (string + 'E'); else return new Token (Token.FloatingPointLiteral, string); } // getFraction /** * **/ private Token getExponent (String string) throws IOException { readChar (); if (data.ch == '+' || data.ch == '-') { string = string + data.ch; readChar (); } else if (data.ch < '0' || data.ch > '9') throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); while (data.ch >= '0' && data.ch <= '9') { string = string + data.ch; readChar (); } return new Token (Token.FloatingPointLiteral, string); } // getExponent /** * **/ private Token isItHex () throws IOException { readChar (); if (data.ch == '.') { readChar (); return getFraction ("0."); } else if (data.ch == 'x' || data.ch == 'X') return getHexNumber ("0x"); else if (data.ch == '8' || data.ch == '9') throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); else if (data.ch >= '0' && data.ch <= '7') return getOctalNumber (); else if (data.ch == 'e' || data.ch == 'E') return getExponent ("0E"); else return new Token (Token.IntegerLiteral, "0"); } // isItHex /** * **/ private Token getOctalNumber () throws IOException { String string = "0" + data.ch; readChar (); while ((data.ch >= '0' && data.ch <= '9')) { if (data.ch == '8' || data.ch == '9') throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); string = string + data.ch; readChar (); } return new Token (Token.IntegerLiteral, string); } // getOctalNumber /** * **/ private Token getHexNumber (String string) throws IOException { readChar (); if ((data.ch < '0' || data.ch > '9') && (data.ch < 'a' || data.ch > 'f') && (data.ch < 'A' || data.ch > 'F')) throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); else while ((data.ch >= '0' && data.ch <= '9') || (data.ch >= 'a' && data.ch <= 'f') || (data.ch >= 'A' && data.ch <= 'F')) { string = string + data.ch; readChar (); } return new Token (Token.IntegerLiteral, string); } // getHexNumber /** * **/ private int getNDigitHexNumber (int n) throws IOException { readChar (); if (!isHexChar (data.ch)) throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); String string = "" + data.ch; readChar (); for (int i = 2; i <= n; i++) { if (!isHexChar( data.ch)) break; string += data.ch; readChar (); } try { return Integer.parseInt (string, 16); } catch (NumberFormatException e) { } return 0; } // getNDigitHexNumber /** * **/ private boolean isHexChar ( char hex ) { return ((data.ch >= '0') && (data.ch <= '9')) || ((data.ch >= 'a') && (data.ch <= 'f')) || ((data.ch >= 'A') && (data.ch <= 'F')); } /** * **/ private int get3DigitOctalNumber () throws IOException { char firstDigit = data.ch; String string = "" + data.ch; readChar (); if (data.ch >= '0' && data.ch <= '7') { string = string + data.ch; readChar (); if (data.ch >= '0' && data.ch <= '7') { string = string + data.ch; if (firstDigit > '3') // This is a 3-digit number bigger than 377 throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), firstDigit); readChar (); } } int ret = 0; try { ret = Integer.parseInt (string, 8); } catch (NumberFormatException e) { throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), string.charAt (0)); } return ret; } // get3DigitOctalNumber /** * **/ private Token getInteger () throws IOException { String string = "" + data.ch; readChar (); if (data.ch == '.') { readChar (); return getFraction (string + '.'); } else if (data.ch == 'e' || data.ch == 'E') return getExponent (string + 'E'); else if (data.ch >= '0' && data.ch <= '9') while (data.ch >= '0' && data.ch <= '9') { string = string + data.ch; readChar (); if (data.ch == '.') { readChar (); return getFraction (string + '.'); } } return new Token (Token.IntegerLiteral, string); } // getInteger /** * **/ private Token replaceTrigraph () throws IOException { readChar (); if (data.ch == '?') { readChar (); if (data.ch == '=') data.ch = '#'; else if (data.ch == '/') data.ch = '\\'; else if (data.ch == '\'') data.ch = '^'; else if (data.ch == '(') data.ch = '['; else if (data.ch == ')') data.ch = ']'; else if (data.ch == '!') data.ch = '|'; else if (data.ch == '<') data.ch = '{'; else if (data.ch == '>') data.ch = '}'; else if (data.ch == '-') data.ch = '~'; else { unread (data.ch); unread ('?'); throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); } return getToken (); } else { unread ('?'); throw new InvalidCharacter (data.filename, currentLine (), currentLineNumber (), currentLinePosition (), data.ch); } } // replaceTrigraph /** * **/ void skipWhiteSpace () throws IOException { while (data.ch <= ' ') readChar (); } // skipWhiteSpace /** * **/ private void skipBlockComment () throws IOException { try { boolean done = false; readChar (); while (!done) { while (data.ch != '*') readChar (); readChar (); if (data.ch == '/') done = true; } } catch (EOFException e) { ParseException.unclosedComment (data.filename); throw e; } } // skipBlockComment /** * **/ void skipLineComment () throws IOException { while (data.ch != '\n') readChar (); } // skipLineComment // The following two routines added to extract comments rather // than ignore them.
Extract a line comment from the input buffer.
/** * Extract a line comment from the input buffer. **/
private String getLineComment () throws IOException { StringBuffer sb = new StringBuffer( "/" ); while (data.ch != '\n') { if (data.ch != '\r') sb.append (data.ch); readChar (); } return sb.toString(); } // getLineComment
Extract a block comment from the input buffer.
/** * Extract a block comment from the input buffer. **/
private String getBlockComment () throws IOException { StringBuffer sb = new StringBuffer ("/*"); try { boolean done = false; readChar (); sb.append (data.ch); while (!done) { while (data.ch != '*') { readChar (); sb.append (data.ch); } readChar (); sb.append (data.ch); if (data.ch == '/') done = true; } } catch (EOFException e) { ParseException.unclosedComment (data.filename); throw e; } return sb.toString (); } // getBlockComment /** * **/ Token skipUntil (char c) throws IOException { while (data.ch != c) { if (data.ch == '/') { readChar (); if (data.ch == '/') { skipLineComment (); // If this is skipping until the newline, skipLineComment // reads past the newline, so it won't be seen by the // while loop conditional check. if (c == '\n') break; } else if (data.ch == '*') skipBlockComment (); } else readChar (); } return getToken (); } // skipUntil // getUntil is used for macro definitions and to get quoted // strings, so characters within "("...")" and '"'...'"' are // ignored. Ie getUntil ',' on (,,,,),X will return (,,,,) String getUntil (char c) throws IOException { return getUntil (c, true, true, true); } String getUntil (char c, boolean allowQuote, boolean allowCharLit, boolean allowComment) throws IOException { String string = ""; while (data.ch != c) string = appendToString (string, allowQuote, allowCharLit, allowComment); return string; } // getUntil /** * **/ String getUntil (char c1, char c2) throws IOException { String string = ""; while (data.ch != c1 && data.ch != c2) string = appendToString (string, false, false, false); return string; } // getUntil /** * **/ private String appendToString (String string, boolean allowQuote, boolean allowCharLit, boolean allowComment) throws IOException { // Ignore any comments if they are allowed if (allowComment && data.ch == '/') { readChar (); if (data.ch == '/') skipLineComment (); else if (data.ch == '*') skipBlockComment (); else string = string + '/'; } // Handle line continuation character else if (data.ch == '\\') { readChar (); if (data.ch == '\n') readChar (); else if (data.ch == '\r') { readChar (); if (data.ch == '\n') readChar (); } else { string = string + '\\' + data.ch; readChar (); } } // characters within "("...")" and '"'...'"' are ignored. // Ie getUntil ',' on (,,,,),X will return (,,,) else { if (allowCharLit && data.ch == '"') { readChar (); string = string + '"'; while (data.ch != '"') string = appendToString (string, true, false, allowComment); } else if (allowQuote && allowCharLit && data.ch == '(') { readChar (); string = string + '('; while (data.ch != ')') string = appendToString (string, false, false, allowComment); } else if (allowQuote && data.ch == '\'') { readChar (); string = string + "'"; while (data.ch != '\'') string = appendToString (string, false, true, allowComment); } string = string + data.ch; readChar (); } return string; } // appendToString /** * **/ String getStringToEOL () throws IOException { String string = new String (); while (data.ch != '\n') { if (data.ch == '\\') { readChar (); if (data.ch == '\n') readChar (); else if (data.ch == '\r') { readChar (); if (data.ch == '\n') readChar (); } else { string = string + data.ch; readChar (); } } else { string = string + data.ch; readChar (); } } return string; } // getStringToEOL /** * **/ String filename () { return data.filename; } // filename /** * **/ IncludeEntry fileEntry () { return data.fileEntry; } // fileEntry /** * **/ int currentLineNumber () { return data.line; } // currentLineNumber /** * **/ int lastTokenLineNumber () { return data.oldLine; } // lastTokenLineNumber private int BOL; // Beginning Of Line /** * **/ String currentLine () { BOL = data.fileIndex - 1; try { // If the current position is at the end of the line, // set BOL to before the end of the line so the whole // line is returned. if (data.fileBytes[BOL - 1] == '\r' && data.fileBytes[BOL] == '\n') BOL -= 2; else if (data.fileBytes[BOL] == '\n') --BOL; while (data.fileBytes[BOL] != '\n') --BOL; } catch (ArrayIndexOutOfBoundsException e) { BOL = -1; } ++BOL; // Go to the first character AFTER the newline int EOL = data.fileIndex - 1; try { while (data.fileBytes[EOL] != '\n' && data.fileBytes[EOL] != '\r') ++EOL; } catch (ArrayIndexOutOfBoundsException e) { EOL = data.fileBytes.length; } if (BOL < EOL) return new String (data.fileBytes, BOL, EOL - BOL); else return ""; } // currentLine /** * **/ String lastTokenLine () { int saveFileIndex = data.fileIndex; data.fileIndex = data.oldIndex; String ret = currentLine (); data.fileIndex = saveFileIndex; return ret; } // lastTokenLine /** * **/ int currentLinePosition () { return data.fileIndex - BOL; } // currentLinePosition /** * **/ int lastTokenLinePosition () { return data.oldIndex - BOL; } // lastTokenLinePosition // The scanner data is moved to a separate class so that all of the // data can easily be pushed and popped to a stack. // The data must be stackable for macros and #included files. When // a macro is encountered: the current stack data is reserved on // the stack; the stack is loaded with the macro info; processing // proceeds with this data. The same is true for #included files. // It may seem that the entire Scanner should be put on a stack in // the Parser since all the scanner data is stackable. But that // would mean instantiating a new scanner. The scanner must // continue from where it left off; when certain things cross file // boundaries, they must be handled by the scanner, not the parser, // things like: block comments, quoted strings, tokens. private ScannerData data = new ScannerData (); private Stack dataStack = new Stack (); private Vector keywords = new Vector (); private Vector openEndedKeywords = new Vector (); private Vector wildcardKeywords = new Vector (); private boolean verbose; // <f46082.40> Identifiers starting with '_' are considered "Escaped", // except when scanned during preprocessing. Class Preprocessor is // responsible to modify the escapedOK flag accordingly. Since preceding // underscores are now legal when scanning identifiers as well as // macro identifier, underscoreOK is obsolete. // // boolean underscoreOK = false; boolean escapedOK = true; // <f46082.51> Remove -stateful feature. // boolean stateful; private boolean emitAll; private float corbaLevel; private boolean debug ; } // class Scanner // This is a dumb class, really just a struct. It contains all of the // scanner class's data in one place so that that data can be easily // pushed and popped to a stack. /** * **/ class ScannerData { /** * **/ public ScannerData () { } // ctor /** * **/ public ScannerData (ScannerData that) { indent = that.indent; fileEntry = that.fileEntry; filename = that.filename; fileBytes = that.fileBytes; fileIndex = that.fileIndex; oldIndex = that.oldIndex; ch = that.ch; line = that.line; oldLine = that.oldLine; macrodata = that.macrodata; includeIsImport = that.includeIsImport; } // copy ctor String indent = ""; IncludeEntry fileEntry = null; String filename = ""; // fileBytes is a byte array rather than a char array. This is // safe because OMG IDL is specified to be ISO Latin-1 whose high- // order byte is always 0x0. <f49747.1> Converted from byte[] to char[] // to employ Reader classes, which have Character encoding features. <ajb> //byte[] fileBytes = null; char[] fileBytes = null; int fileIndex = 0; int oldIndex = 0; char ch; int line = 1; int oldLine = 1; boolean macrodata = false; boolean includeIsImport = false; } // class ScannerData