java/9 : jdk.xml.bind/com/sun/xml/internal/dtdparser/InputEntity.java

InputEntity
https://openjdk.java.net/
GPLv2 + Classpath Exception
/*
 * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package com.sun.xml.internal.dtdparser;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import java.io.CharConversionException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.Arrays;
import java.util.Locale;

This is how the parser talks to its input entities, of all kinds.
The entities are in a stack.

 For internal entities, the character arrays are referenced here,
and read from as needed (they're read-only).  External entities have
mutable buffers, that are read into as needed.

 Note: This maps CRLF (and CR) to LF without regard for
whether it's in an external (parsed) entity or not.  The XML 1.0 spec
is inconsistent in explaining EOL handling; this is the sensible way.
Author: David Brownell, Janet Koenig
Version: 1.4 00/08/05/**
 * This is how the parser talks to its input entities, of all kinds.
 * The entities are in a stack.
 * <p>
 * <P> For internal entities, the character arrays are referenced here,
 * and read from as needed (they're read-only).  External entities have
 * mutable buffers, that are read into as needed.
 * <p>
 * <P> <em>Note:</em> This maps CRLF (and CR) to LF without regard for
 * whether it's in an external (parsed) entity or not.  The XML 1.0 spec
 * is inconsistent in explaining EOL handling; this is the sensible way.
 *
 * @author David Brownell
 * @author Janet Koenig
 * @version 1.4 00/08/05
 */
public class InputEntity {
    private int start, finish;
    private char buf [];
    private int lineNumber = 1;
    private boolean returnedFirstHalf = false;
    private boolean maybeInCRLF = false;

    // name of entity (never main document or unnamed DTD PE)
    private String name;

    private InputEntity next;

    // for system and public IDs in diagnostics
    private InputSource input;

    // this is a buffer; some buffers can be replenished.
    private Reader reader;
    private boolean isClosed;

    private DTDEventListener errHandler;
    private Locale locale;

    private StringBuffer rememberedText;
    private int startRemember;

    // record if this is a PE, so endParsedEntity won't be called
    private boolean isPE;

    // InputStreamReader throws an internal per-read exception, so
    // we minimize reads.  We also add a byte to compensate for the
    // "ungetc" byte we keep, so that our downstream reads are as
    // nicely sized as we can make them.
    final private static int BUFSIZ = 8 * 1024 + 1;

    final private static char newline [] = {'\n'};

    public static InputEntity getInputEntity(DTDEventListener h, Locale l) {
        InputEntity retval = new InputEntity();
        retval.errHandler = h;
        retval.locale = l;
        return retval;
    }

    private InputEntity() {
    }

    //
    // predicate:  return true iff this is an internal entity reader,
    // and so may safely be "popped" as needed.  external entities have
    // syntax to uphold; internal parameter entities have at most validity
    // constraints to monitor.  also, only external entities get decent
    // location diagnostics.
    //
    public boolean isInternal() {
        return reader == null;
    }

    //
    // predicate:  return true iff this is the toplevel document
    //
    public boolean isDocument() {
        return next == null;
    }

    //
    // predicate:  return true iff this is a PE expansion (so that
    // LexicalEventListner.endParsedEntity won't be called)
    //
    public boolean isParameterEntity() {
        return isPE;
    }

    //
    // return name of current entity
    //
    public String getName() {
        return name;
    }

    //
    // use this for an external parsed entity
    //
    public void init(InputSource in, String name, InputEntity stack,
                     boolean isPE)
            throws IOException, SAXException {

        input = in;
        this.isPE = isPE;
        reader = in.getCharacterStream();

        if (reader == null) {
            InputStream bytes = in.getByteStream();

            if (bytes == null)
                if (Boolean.valueOf(System.getProperty("enableExternalEntityProcessing")))
                    reader = XmlReader.createReader(new URL(in.getSystemId()).openStream());
                else
                    fatal("P-082", new Object[] {in.getSystemId()});
            else if (in.getEncoding() != null)
                reader = XmlReader.createReader(in.getByteStream(), in.getEncoding());
            else
                reader = XmlReader.createReader(in.getByteStream());
        }
        next = stack;
        buf = new char[BUFSIZ];
        this.name = name;
        checkRecursion(stack);
    }

    //
    // use this for an internal parsed entity; buffer is readonly
    //
    public void init(char b [], String name, InputEntity stack, boolean isPE)
            throws SAXException {

        next = stack;
        buf = Arrays.copyOf(b, b.length);
        finish = b.length;
        this.name = name;
        this.isPE = isPE;
        checkRecursion(stack);
    }

    private void checkRecursion(InputEntity stack)
            throws SAXException {

        if (stack == null)
            return;
        for (stack = stack.next; stack != null; stack = stack.next) {
            if (stack.name != null && stack.name.equals(name))
                fatal("P-069", new Object[]{name});
        }
    }

    public InputEntity pop() throws IOException {

        // caller has ensured there's nothing left to read
        close();
        return next;
    }

    returns true iff there's no more data to consume ...
/**
     * returns true iff there's no more data to consume ...
     */
    public boolean isEOF() throws IOException, SAXException {

        // called to ensure WF-ness of included entities and to pop
        // input entities appropriately ... EOF is not always legal.
        if (start >= finish) {
            fillbuf();
            return start >= finish;
        } else
            return false;
    }

    Returns the name of the encoding in use, else null; the name
returned is in as standard a form as we can get.
/**
     * Returns the name of the encoding in use, else null; the name
     * returned is in as standard a form as we can get.
     */
    public String getEncoding() {

        if (reader == null)
            return null;
        if (reader instanceof XmlReader)
            return ((XmlReader) reader).getEncoding();

        // XXX prefer a java2std() call to normalize names...

        if (reader instanceof InputStreamReader)
            return ((InputStreamReader) reader).getEncoding();
        return null;
    }


    returns the next name char, or NUL ... faster than getc(),
and the common "name or nmtoken must be next" case won't
need ungetc().
/**
     * returns the next name char, or NUL ... faster than getc(),
     * and the common "name or nmtoken must be next" case won't
     * need ungetc().
     */
    public char getNameChar() throws IOException, SAXException {

        if (finish <= start)
            fillbuf();
        if (finish > start) {
            char c = buf[start++];
            if (XmlChars.isNameChar(c))
                return c;
            start--;
        }
        return 0;
    }

    gets the next Java character -- might be part of an XML
text character represented by a surrogate pair, or be
the end of the entity.
/**
     * gets the next Java character -- might be part of an XML
     * text character represented by a surrogate pair, or be
     * the end of the entity.
     */
    public char getc() throws IOException, SAXException {

        if (finish <= start)
            fillbuf();
        if (finish > start) {
            char c = buf[start++];

            // [2] Char ::= #x0009 | #x000A | #x000D
            //            | [#x0020-#xD7FF]
            //            | [#xE000-#xFFFD]
            // plus surrogate _pairs_ representing [#x10000-#x10ffff]
            if (returnedFirstHalf) {
                if (c >= 0xdc00 && c <= 0xdfff) {
                    returnedFirstHalf = false;
                    return c;
                } else
                    fatal("P-070", new Object[]{Integer.toHexString(c)});
            }
            if ((c >= 0x0020 && c <= 0xD7FF)
                    || c == 0x0009
                    // no surrogates!
                    || (c >= 0xE000 && c <= 0xFFFD))
                return c;

            //
            // CRLF and CR are both line ends; map both to LF, and
            // keep line count correct.
            //
            else if (c == '\r' && !isInternal()) {
                maybeInCRLF = true;
                c = getc();
                if (c != '\n')
                    ungetc();
                maybeInCRLF = false;

                lineNumber++;
                return '\n';

            } else if (c == '\n' || c == '\r') { // LF, or 2nd char in CRLF
                if (!isInternal() && !maybeInCRLF)
                    lineNumber++;
                return c;
            }

            // surrogates...
            if (c >= 0xd800 && c < 0xdc00) {
                returnedFirstHalf = true;
                return c;
            }

            fatal("P-071", new Object[]{Integer.toHexString(c)});
        }
        throw new EndOfInputException();
    }


    lookahead one character
/**
     * lookahead one character
     */
    public boolean peekc(char c) throws IOException, SAXException {

        if (finish <= start)
            fillbuf();
        if (finish > start) {
            if (buf[start] == c) {
                start++;
                return true;
            } else
                return false;
        }
        return false;
    }


    two character pushback is guaranteed
/**
     * two character pushback is guaranteed
     */
    public void ungetc() {

        if (start == 0)
            throw new InternalError("ungetc");
        start--;

        if (buf[start] == '\n' || buf[start] == '\r') {
            if (!isInternal())
                lineNumber--;
        } else if (returnedFirstHalf)
            returnedFirstHalf = false;
    }


    optional grammatical whitespace (discarded)
/**
     * optional grammatical whitespace (discarded)
     */
    public boolean maybeWhitespace()
            throws IOException, SAXException {

        char c;
        boolean isSpace = false;
        boolean sawCR = false;

        // [3] S ::= #20 | #09 | #0D | #0A
        for (; ;) {
            if (finish <= start)
                fillbuf();
            if (finish <= start)
                return isSpace;

            c = buf[start++];
            if (c == 0x20 || c == 0x09 || c == '\n' || c == '\r') {
                isSpace = true;

                //
                // CR, LF are line endings ... CLRF is one, not two!
                //
                if ((c == '\n' || c == '\r') && !isInternal()) {
                    if (!(c == '\n' && sawCR)) {
                        lineNumber++;
                        sawCR = false;
                    }
                    if (c == '\r')
                        sawCR = true;
                }
            } else {
                start--;
                return isSpace;
            }
        }
    }


    normal content; whitespace in markup may be handled
specially if the parser uses the content model.

 content terminates with markup delimiter characters,
namely ampersand (&amp;) and left angle bracket (&lt;).

 the document handler's characters() method is called
on all the content found
/**
     * normal content; whitespace in markup may be handled
     * specially if the parser uses the content model.
     * <p>
     * <P> content terminates with markup delimiter characters,
     * namely ampersand (&amp;amp;) and left angle bracket (&amp;lt;).
     * <p>
     * <P> the document handler's characters() method is called
     * on all the content found
     */
    public boolean parsedContent(DTDEventListener docHandler
                                 /*ElementValidator validator*/)
            throws IOException, SAXException {

        // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)

        int first;        // first char to return
        int last;        // last char to return
        boolean sawContent;    // sent any chars?
        char c;

        // deliver right out of the buffer, until delimiter, EOF,
        // or error, refilling as we go
        for (first = last = start, sawContent = false; ; last++) {

            // buffer empty?
            if (last >= finish) {
                if (last > first) {
//            validator.text ();
                    docHandler.characters(buf, first, last - first);
                    sawContent = true;
                    start = last;
                }
                if (isEOF())    // calls fillbuf
                    return sawContent;
                first = start;
                last = first - 1;    // incremented in loop
                continue;
            }

            c = buf[last];

            //
            // pass most chars through ASAP; this inlines the code of
            // [2] !XmlChars.isChar(c) leaving only characters needing
            // special treatment ... line ends, surrogates, and:
            //    0x0026 == '&'
            //    0x003C == '<'
            //    0x005D == ']'
            // Comparisons ordered for speed on 'typical' text
            //
            if ((c > 0x005D && c <= 0xD7FF)    // a-z and more
                    || (c < 0x0026 && c >= 0x0020)    // space & punct
                    || (c > 0x003C && c < 0x005D)    // A-Z & punct
                    || (c > 0x0026 && c < 0x003C)    // 0-9 & punct
                    || c == 0x0009
                    || (c >= 0xE000 && c <= 0xFFFD)
            )
                continue;

            // terminate on markup delimiters
            if (c == '<' || c == '&')
                break;

            // count lines
            if (c == '\n') {
                if (!isInternal())
                    lineNumber++;
                continue;
            }

            // External entities get CR, CRLF --> LF mapping
            // Internal ones got it already, and we can't repeat
            // else we break char ref handling!!
            if (c == '\r') {
                if (isInternal())
                    continue;

                docHandler.characters(buf, first, last - first);
                docHandler.characters(newline, 0, 1);
                sawContent = true;
                lineNumber++;
                if (finish > (last + 1)) {
                    if (buf[last + 1] == '\n')
                        last++;
                } else {    // CR at end of buffer
// XXX case not yet handled:  CRLF here will look like two lines
                }
                first = start = last + 1;
                continue;
            }

            // ']]>' is a WF error -- must fail if we see it
            if (c == ']') {
                switch (finish - last) {
                // for suspicious end-of-buffer cases, get more data
                // into the buffer to rule out this sequence.
                case 2:
                    if (buf[last + 1] != ']')
                        continue;
                    // FALLTHROUGH

                case 1:
                    if (reader == null || isClosed)
                        continue;
                    if (last == first)
                        throw new InternalError("fillbuf");
                    last--;
                    if (last > first) {
//            validator.text ();
                        docHandler.characters(buf, first, last - first);
                        sawContent = true;
                        start = last;
                    }
                    fillbuf();
                    first = last = start;
                    continue;

                    // otherwise any "]]>" would be buffered, and we can
                    // see right away if that's what we have
                default:
                    if (buf[last + 1] == ']' && buf[last + 2] == '>')
                        fatal("P-072", null);
                    continue;
                }
            }

            // correctly paired surrogates are OK
            if (c >= 0xd800 && c <= 0xdfff) {
                if ((last + 1) >= finish) {
                    if (last > first) {
//            validator.text ();
                        docHandler.characters(buf, first, last - first);
                        sawContent = true;
                        start = last + 1;
                    }
                    if (isEOF()) {    // calls fillbuf
                        fatal("P-081",
                                new Object[]{Integer.toHexString(c)});
                    }
                    first = start;
                    last = first;
                    continue;
                }
                if (checkSurrogatePair(last))
                    last++;
                else {
                    last--;
                    // also terminate on surrogate pair oddities
                    break;
                }
                continue;
            }

            fatal("P-071", new Object[]{Integer.toHexString(c)});
        }
        if (last == first)
            return sawContent;
//    validator.text ();
        docHandler.characters(buf, first, last - first);
        start = last;
        return true;
    }


    CDATA -- character data, terminated by "]]>" and optionally including unescaped markup delimiters (ampersand and left angle bracket). This should otherwise be exactly like character data, modulo differences in error report details. 
 The document handler's characters() or ignorableWhitespace()
methods are invoked on all the character data found
Params: docHandler –               gets callbacks for character data
ignorableWhitespace –      if true, whitespace characters will
                                be reported using docHandler.ignorableWhitespace(); implicitly,
                                non-whitespace characters will cause validation errors
whitespaceInvalidMessage – if true, ignorable whitespace
                                causes a validity error report as well as a callback/**
     * CDATA -- character data, terminated by {@code "]]>"} and optionally
     * including unescaped markup delimiters (ampersand and left angle
     * bracket).  This should otherwise be exactly like character data,
     * modulo differences in error report details.
     * <p>
     * <P> The document handler's characters() or ignorableWhitespace()
     * methods are invoked on all the character data found
     *
     * @param docHandler               gets callbacks for character data
     * @param ignorableWhitespace      if true, whitespace characters will
     *                                 be reported using docHandler.ignorableWhitespace(); implicitly,
     *                                 non-whitespace characters will cause validation errors
     * @param whitespaceInvalidMessage if true, ignorable whitespace
     *                                 causes a validity error report as well as a callback
     */
    public boolean unparsedContent(DTDEventListener docHandler,
                                   /*ElementValidator validator,*/
                                   boolean ignorableWhitespace,
                                   String whitespaceInvalidMessage)
            throws IOException, SAXException {

        // [18] CDSect ::= CDStart CData CDEnd
        // [19] CDStart ::= '<![CDATA['
        // [20] CData ::= (Char* - (Char* ']]>' Char*))
        // [21] CDEnd ::= ']]>'

        // caller peeked the leading '<' ...
        if (!peek("![CDATA[", null))
            return false;
        docHandler.startCDATA();

        // only a literal ']]>' stops this ...
        int last;

        for (; ;) {        // until ']]>' seen
            boolean done = false;
            char c;

            // don't report ignorable whitespace as "text" for
            // validation purposes.
            boolean white = ignorableWhitespace;

            for (last = start; last < finish; last++) {
                c = buf[last];

                //
                // Reject illegal characters.
                //
                if (!XmlChars.isChar(c)) {
                    white = false;
                    if (c >= 0xd800 && c <= 0xdfff) {
                        if (checkSurrogatePair(last)) {
                            last++;
                            continue;
                        } else {
                            last--;
                            break;
                        }
                    }
                    fatal("P-071", new Object[]
                    {Integer.toHexString(buf[last])});
                }
                if (c == '\n') {
                    if (!isInternal())
                        lineNumber++;
                    continue;
                }
                if (c == '\r') {
                    // As above, we can't repeat CR/CRLF --> LF mapping
                    if (isInternal())
                        continue;

                    if (white) {
                        if (whitespaceInvalidMessage != null && errHandler != null)
                            errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
                                    whitespaceInvalidMessage), null));
                        docHandler.ignorableWhitespace(buf, start,
                                last - start);
                        docHandler.ignorableWhitespace(newline, 0, 1);
                    } else {
//            validator.text ();
                        docHandler.characters(buf, start, last - start);
                        docHandler.characters(newline, 0, 1);
                    }
                    lineNumber++;
                    if (finish > (last + 1)) {
                        if (buf[last + 1] == '\n')
                            last++;
                    } else {    // CR at end of buffer
// XXX case not yet handled ... as above
                    }
                    start = last + 1;
                    continue;
                }
                if (c != ']') {
                    if (c != ' ' && c != '\t')
                        white = false;
                    continue;
                }
                if ((last + 2) < finish) {
                    if (buf[last + 1] == ']' && buf[last + 2] == '>') {
                        done = true;
                        break;
                    }
                    white = false;
                    continue;
                } else {
                    //last--;
                    break;
                }
            }
            if (white) {
                if (whitespaceInvalidMessage != null && errHandler != null)
                    errHandler.error(new SAXParseException(DTDParser.messages.getMessage(locale,
                            whitespaceInvalidMessage), null));
                docHandler.ignorableWhitespace(buf, start, last - start);
            } else {
//        validator.text ();
                docHandler.characters(buf, start, last - start);
            }
            if (done) {
                start = last + 3;
                break;
            }
            start = last;
            if (isEOF())
                fatal("P-073", null);
        }
        docHandler.endCDATA();
        return true;
    }

    // return false to backstep at end of buffer)
    private boolean checkSurrogatePair(int offset)
            throws SAXException {

        if ((offset + 1) >= finish)
            return false;

        char c1 = buf[offset++];
        char c2 = buf[offset];

        if ((c1 >= 0xd800 && c1 < 0xdc00) && (c2 >= 0xdc00 && c2 <= 0xdfff))
            return true;
        fatal("P-074", new Object[]{
            Integer.toHexString(c1 & 0x0ffff),
            Integer.toHexString(c2 & 0x0ffff)
        });
        return false;
    }


    whitespace in markup (flagged to app, discardable)

 the document handler's ignorableWhitespace() method
is called on all the whitespace found
/**
     * whitespace in markup (flagged to app, discardable)
     * <p>
     * <P> the document handler's ignorableWhitespace() method
     * is called on all the whitespace found
     */
    public boolean ignorableWhitespace(DTDEventListener handler)
            throws IOException, SAXException {

        char c;
        boolean isSpace = false;
        int first;

        // [3] S ::= #20 | #09 | #0D | #0A
        for (first = start; ;) {
            if (finish <= start) {
                if (isSpace)
                    handler.ignorableWhitespace(buf, first, start - first);
                fillbuf();
                first = start;
            }
            if (finish <= start)
                return isSpace;

            c = buf[start++];
            switch (c) {
            case '\n':
                if (!isInternal())
                    lineNumber++;
// XXX handles Macintosh line endings wrong
                // fallthrough
            case 0x09:
            case 0x20:
                isSpace = true;
                continue;

            case '\r':
                isSpace = true;
                if (!isInternal())
                    lineNumber++;
                handler.ignorableWhitespace(buf, first,
                        (start - 1) - first);
                handler.ignorableWhitespace(newline, 0, 1);
                if (start < finish && buf[start] == '\n')
                    ++start;
                first = start;
                continue;

            default:
                ungetc();
                if (isSpace)
                    handler.ignorableWhitespace(buf, first, start - first);
                return isSpace;
            }
        }
    }

    returns false iff 'next' string isn't as provided,
else skips that text and returns true.

 NOTE:  two alternative string representations are
both passed in, since one is faster.
/**
     * returns false iff 'next' string isn't as provided,
     * else skips that text and returns true.
     * <p>
     * <P> NOTE:  two alternative string representations are
     * both passed in, since one is faster.
     */
    public boolean peek(String next, char chars [])
            throws IOException, SAXException {

        int len;
        int i;

        if (chars != null)
            len = chars.length;
        else
            len = next.length();

        // buffer should hold the whole thing ... give it a
        // chance for the end-of-buffer case and cope with EOF
        // by letting fillbuf compact and fill
        if (finish <= start || (finish - start) < len)
            fillbuf();

        // can't peek past EOF
        if (finish <= start)
            return false;

        // compare the string; consume iff it matches
        if (chars != null) {
            for (i = 0; i < len && (start + i) < finish; i++) {
                if (buf[start + i] != chars[i])
                    return false;
            }
        } else {
            for (i = 0; i < len && (start + i) < finish; i++) {
                if (buf[start + i] != next.charAt(i))
                    return false;
            }
        }

        // if the first fillbuf didn't get enough data, give
        // fillbuf another chance to read
        if (i < len) {
            if (reader == null || isClosed)
                return false;

            //
            // This diagnostic "knows" that the only way big strings would
            // fail to be peeked is where it's a symbol ... e.g. for an
            // </EndTag> construct.  That knowledge could also be applied
            // to get rid of the symbol length constraint, since having
            // the wrong symbol is a fatal error anyway ...
            //
            if (len > buf.length) {
                fatal("P-077", new Object[]{Integer.valueOf(buf.length)});
            }

            fillbuf();
            return peek(next, chars);
        }

        start += len;
        return true;
    }


    //
    // Support for reporting the internal DTD subset, so <!DOCTYPE...>
    // declarations can be recreated.  This is collected as a single
    // string; such subsets are normally small, and many applications
    // don't even care about this.
    //
    public void startRemembering() {

        if (startRemember != 0)
            throw new InternalError();
        startRemember = start;
    }

    public String rememberText() {

        String retval;

        // If the internal subset crossed a buffer boundary, we
        // created a temporary buffer.
        if (rememberedText != null) {
            rememberedText.append(buf, startRemember,
                    start - startRemember);
            retval = rememberedText.toString();
        } else
            retval = new String(buf, startRemember,
                    start - startRemember);

        startRemember = 0;
        rememberedText = null;
        return retval;
    }

    private InputEntity getTopEntity() {

        InputEntity current = this;

        // don't report locations within internal entities!

        while (current != null && current.input == null)
            current = current.next;
        return current == null ? this : current;
    }

    Returns the public ID of this input source, if known
/**
     * Returns the public ID of this input source, if known
     */
    public String getPublicId() {

        InputEntity where = getTopEntity();
        if (where == this)
            return input.getPublicId();
        return where.getPublicId();
    }

    Returns the system ID of this input source, if known
/**
     * Returns the system ID of this input source, if known
     */
    public String getSystemId() {

        InputEntity where = getTopEntity();
        if (where == this)
            return input.getSystemId();
        return where.getSystemId();
    }

    Returns the current line number in this input source
/**
     * Returns the current line number in this input source
     */
    public int getLineNumber() {

        InputEntity where = getTopEntity();
        if (where == this)
            return lineNumber;
        return where.getLineNumber();
    }

    returns -1; maintaining column numbers hurts performance
/**
     * returns -1; maintaining column numbers hurts performance
     */
    public int getColumnNumber() {

        return -1;        // not maintained (speed)
    }


    //
    // n.b. for non-EOF end-of-buffer cases, reader should return
    // at least a handful of bytes so various lookaheads behave.
    //
    // two character pushback exists except at first; characters
    // represented by surrogate pairs can't be pushed back (they'd
    // only be in character data anyway).
    //
    // DTD exception thrown on char conversion problems; line number
    // will be low, as a rule.
    //
    private void fillbuf() throws IOException, SAXException {

        // don't touched fixed buffers, that'll usually
        // change entity values (and isn't needed anyway)
        // likewise, ignore closed streams
        if (reader == null || isClosed)
            return;

        // if remembering DTD text, copy!
        if (startRemember != 0) {
            if (rememberedText == null)
                rememberedText = new StringBuffer(buf.length);
            rememberedText.append(buf, startRemember,
                    start - startRemember);
        }

        boolean extra = (finish > 0) && (start > 0);
        int len;

        if (extra)        // extra pushback
            start--;
        len = finish - start;

        System.arraycopy(buf, start, buf, 0, len);
        start = 0;
        finish = len;

        try {
            len = buf.length - len;
            len = reader.read(buf, finish, len);
        } catch (UnsupportedEncodingException e) {
            fatal("P-075", new Object[]{e.getMessage()});
        } catch (CharConversionException e) {
            fatal("P-076", new Object[]{e.getMessage()});
        }
        if (len >= 0)
            finish += len;
        else
            close();
        if (extra)        // extra pushback
            start++;

        if (startRemember != 0)
        // assert extra == true
            startRemember = 1;
    }

    public void close() {

        try {
            if (reader != null && !isClosed)
                reader.close();
            isClosed = true;
        } catch (IOException e) {
            /* NOTHING */
        }
    }


    private void fatal(String messageId, Object params [])
            throws SAXException {

        SAXParseException x = new SAXParseException(DTDParser.messages.getMessage(locale, messageId, params), null);

        // not continuable ... e.g. WF errors
        close();
        if (errHandler != null) {
            errHandler.fatalError(x);
        }
        throw x;
    }
}
/

java/ 9/ jdk.xml.bind/com/sun/xml/internal/dtdparser/InputEntity.java