/*
 * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

package jdk.internal.util.xml.impl;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import jdk.internal.org.xml.sax.InputSource;
import jdk.internal.org.xml.sax.SAXException;

XML non-validating parser engine.
/** * XML non-validating parser engine. */
public abstract class Parser { public static final String FAULT = ""; protected static final int BUFFSIZE_READER = 512; protected static final int BUFFSIZE_PARSER = 128;
The end of stream character.
/** * The end of stream character. */
public static final char EOS = 0xffff; private Pair mNoNS; // there is no namespace private Pair mXml; // the xml namespace private Map<String, Input> mEnt; // the entities look up table private Map<String, Input> mPEnt; // the parmeter entities look up table protected boolean mIsSAlone; // xml decl standalone flag protected boolean mIsSAloneSet; // standalone is explicitely set protected boolean mIsNSAware; // if true - namespace aware mode protected int mPh; // current phase of document processing protected static final int PH_BEFORE_DOC = -1; // before parsing protected static final int PH_DOC_START = 0; // document start protected static final int PH_MISC_DTD = 1; // misc before DTD protected static final int PH_DTD = 2; // DTD protected static final int PH_DTD_MISC = 3; // misc after DTD protected static final int PH_DOCELM = 4; // document's element protected static final int PH_DOCELM_MISC = 5; // misc after element protected static final int PH_AFTER_DOC = 6; // after parsing protected int mEvt; // current event type protected static final int EV_NULL = 0; // unknown protected static final int EV_ELM = 1; // empty element protected static final int EV_ELMS = 2; // start element protected static final int EV_ELME = 3; // end element protected static final int EV_TEXT = 4; // textual content protected static final int EV_WSPC = 5; // white space content protected static final int EV_PI = 6; // processing instruction protected static final int EV_CDAT = 7; // character data protected static final int EV_COMM = 8; // comment protected static final int EV_DTD = 9; // document type definition protected static final int EV_ENT = 10; // skipped entity private char mESt; // built-in entity recognizer state // mESt values: // 0x100 : the initial state // > 0x100 : unrecognized name // < 0x100 : replacement character protected char[] mBuff; // parser buffer protected int mBuffIdx; // index of the last char protected Pair mPref; // stack of prefixes protected Pair mElm; // stack of elements // mAttL.chars - element qname // mAttL.next - next element // mAttL.list - list of attributes defined on this element // mAttL.list.chars - attribute qname // mAttL.list.id - a char representing attribute's type see below // mAttL.list.next - next attribute defined on the element // mAttL.list.list - devault value structure or null // mAttL.list.list.chars - "name='value' " chars array for Input // // Attribute type character values: // 'i' - "ID" // 'r' - "IDREF" // 'R' - "IDREFS" // 'n' - "ENTITY" // 'N' - "ENTITIES" // 't' - "NMTOKEN" // 'T' - "NMTOKENS" // 'u' - enumeration type // 'o' - "NOTATION" // 'c' - "CDATA" // see also: bkeyword() and atype() // protected Pair mAttL; // list of defined attrs by element name protected Input mDoc; // document entity protected Input mInp; // stack of entities private char[] mChars; // reading buffer private int mChLen; // current capacity private int mChIdx; // index to the next char protected Attrs mAttrs; // attributes of the curr. element private String[] mItems; // attributes array of the curr. element private char mAttrIdx; // attributes counter/index private String mUnent; // unresolved entity name private Pair mDltd; // deleted objects for reuse
Default prefixes
/** * Default prefixes */
private static final char NONS[]; private static final char XML[]; private static final char XMLNS[]; static { NONS = new char[1]; NONS[0] = (char) 0; XML = new char[4]; XML[0] = (char) 4; XML[1] = 'x'; XML[2] = 'm'; XML[3] = 'l'; XMLNS = new char[6]; XMLNS[0] = (char) 6; XMLNS[1] = 'x'; XMLNS[2] = 'm'; XMLNS[3] = 'l'; XMLNS[4] = 'n'; XMLNS[5] = 's'; }
ASCII character type array. This array maps an ASCII (7 bit) character to the character type.
Possible character type values are:
- ' ' for any kind of white space character;
- 'a' for any lower case alphabetical character value;
- 'A' for any upper case alphabetical character value;
- 'd' for any decimal digit character value;
- 'z' for any character less than ' ' except '\t', '\n', '\r';
An ASCII (7 bit) character which does not fall in any category listed above is mapped to it self.
/** * ASCII character type array. * * This array maps an ASCII (7 bit) character to the character type.<br> * Possible character type values are:<br> - ' ' for any kind of white * space character;<br> - 'a' for any lower case alphabetical character * value;<br> - 'A' for any upper case alphabetical character value;<br> * - 'd' for any decimal digit character value;<br> - 'z' for any * character less than ' ' except '\t', '\n', '\r';<br> An ASCII (7 bit) * character which does not fall in any category listed above is mapped to * it self. */
private static final byte asctyp[];
NMTOKEN character type array. This array maps an ASCII (7 bit) character to the character type.
Possible character type values are:
- 0 for underscore ('_') or any lower and upper case alphabetical character value;
- 1 for colon (':') character;
- 2 for dash ('-') and dot ('.') or any decimal digit character value;
- 3 for any kind of white space character
An ASCII (7 bit) character which does not fall in any category listed above is mapped to 0xff.
/** * NMTOKEN character type array. * * This array maps an ASCII (7 bit) character to the character type.<br> * Possible character type values are:<br> - 0 for underscore ('_') or any * lower and upper case alphabetical character value;<br> - 1 for colon * (':') character;<br> - 2 for dash ('-') and dot ('.') or any decimal * digit character value;<br> - 3 for any kind of white space character<br> * An ASCII (7 bit) character which does not fall in any category listed * above is mapped to 0xff. */
private static final byte nmttyp[];
Static constructor. Sets up the ASCII character type array which is used by asctyp method and NMTOKEN character type array.
/** * Static constructor. * * Sets up the ASCII character type array which is used by * {@link #asctyp asctyp} method and NMTOKEN character type array. */
static { short i = 0; asctyp = new byte[0x80]; while (i < ' ') { asctyp[i++] = (byte) 'z'; } asctyp['\t'] = (byte) ' '; asctyp['\r'] = (byte) ' '; asctyp['\n'] = (byte) ' '; while (i < '0') { asctyp[i] = (byte) i++; } while (i <= '9') { asctyp[i++] = (byte) 'd'; } while (i < 'A') { asctyp[i] = (byte) i++; } while (i <= 'Z') { asctyp[i++] = (byte) 'A'; } while (i < 'a') { asctyp[i] = (byte) i++; } while (i <= 'z') { asctyp[i++] = (byte) 'a'; } while (i < 0x80) { asctyp[i] = (byte) i++; } nmttyp = new byte[0x80]; for (i = 0; i < '0'; i++) { nmttyp[i] = (byte) 0xff; } while (i <= '9') { nmttyp[i++] = (byte) 2; // digits } while (i < 'A') { nmttyp[i++] = (byte) 0xff; } // skiped upper case alphabetical character are already 0 for (i = '['; i < 'a'; i++) { nmttyp[i] = (byte) 0xff; } // skiped lower case alphabetical character are already 0 for (i = '{'; i < 0x80; i++) { nmttyp[i] = (byte) 0xff; } nmttyp['_'] = 0; nmttyp[':'] = 1; nmttyp['.'] = 2; nmttyp['-'] = 2; nmttyp[' '] = 3; nmttyp['\t'] = 3; nmttyp['\r'] = 3; nmttyp['\n'] = 3; }
Constructor.
/** * Constructor. */
protected Parser() { mPh = PH_BEFORE_DOC; // before parsing // Initialize the parser mBuff = new char[BUFFSIZE_PARSER]; mAttrs = new Attrs(); // Default namespace mPref = pair(mPref); mPref.name = ""; mPref.value = ""; mPref.chars = NONS; mNoNS = mPref; // no namespace // XML namespace mPref = pair(mPref); mPref.name = "xml"; mPref.value = "http://www.w3.org/XML/1998/namespace"; mPref.chars = XML; mXml = mPref; // XML namespace }
Initializes parser's internals. Note, current input has to be set before this method is called.
/** * Initializes parser's internals. Note, current input has to be set before * this method is called. */
protected void init() { mUnent = null; mElm = null; mPref = mXml; mAttL = null; mPEnt = new HashMap<>(); mEnt = new HashMap<>(); mDoc = mInp; // current input is document entity mChars = mInp.chars; // use document entity buffer mPh = PH_DOC_START; // the begining of the document }
Cleans up parser internal resources.
/** * Cleans up parser internal resources. */
protected void cleanup() { // Default attributes while (mAttL != null) { while (mAttL.list != null) { if (mAttL.list.list != null) { del(mAttL.list.list); } mAttL.list = del(mAttL.list); } mAttL = del(mAttL); } // Element stack while (mElm != null) { mElm = del(mElm); } // Namespace prefixes while (mPref != mXml) { mPref = del(mPref); } // Inputs while (mInp != null) { pop(); } // Document reader if ((mDoc != null) && (mDoc.src != null)) { try { mDoc.src.close(); } catch (IOException ioe) { } } mPEnt = null; mEnt = null; mDoc = null; mPh = PH_AFTER_DOC; // before documnet processing }
Processes a portion of document. This method returns one of EV_* constants as an identifier of the portion of document have been read.
Throws:
Returns:Identifier of processed document portion.
/** * Processes a portion of document. This method returns one of EV_* * constants as an identifier of the portion of document have been read. * * @return Identifier of processed document portion. * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") protected int step() throws Exception { mEvt = EV_NULL; int st = 0; while (mEvt == EV_NULL) { char ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); switch (st) { case 0: // all sorts of markup (dispetcher) if (ch != '<') { bkch(); mBuffIdx = -1; // clean parser buffer st = 1; break; } switch (getch()) { case '/': // the end of the element content mEvt = EV_ELME; if (mElm == null) { panic(FAULT); } // Check element's open/close tags balance mBuffIdx = -1; // clean parser buffer bname(mIsNSAware); char[] chars = mElm.chars; if (chars.length == (mBuffIdx + 1)) { for (char i = 1; i <= mBuffIdx; i += 1) { if (chars[i] != mBuff[i]) { panic(FAULT); } } } else { panic(FAULT); } // Skip white spaces before '>' if (wsskip() != '>') { panic(FAULT); } getch(); // read '>' break; case '!': // a comment or a CDATA ch = getch(); bkch(); switch (ch) { case '-': // must be a comment mEvt = EV_COMM; comm(); break; case '[': // must be a CDATA section mEvt = EV_CDAT; cdat(); break; default: // must be 'DOCTYPE' mEvt = EV_DTD; dtd(); break; } break; case '?': // processing instruction mEvt = EV_PI; pi(); break; default: // must be the first char of an xml name bkch(); // Read an element name and put it on top of the // element stack mElm = pair(mElm); // add new element to the stack mElm.chars = qname(mIsNSAware); mElm.name = mElm.local(); mElm.id = (mElm.next != null) ? mElm.next.id : 0; // flags mElm.num = 0; // namespace counter // Find the list of defined attributs of the current // element Pair elm = find(mAttL, mElm.chars); mElm.list = (elm != null) ? elm.list : null; // Read attributes till the end of the element tag mAttrIdx = 0; Pair att = pair(null); att.num = 0; // clear attribute's flags attr(att); // get all attributes inc. defaults del(att); mElm.value = (mIsNSAware) ? rslv(mElm.chars) : null; // Skip white spaces before '>' switch (wsskip()) { case '>': getch(); // read '>' mEvt = EV_ELMS; break; case '/': getch(); // read '/' if (getch() != '>') // read '>' { panic(FAULT); } mEvt = EV_ELM; break; default: panic(FAULT); } break; } break; case 1: // read white space switch (ch) { case ' ': case '\t': case '\n': bappend(ch); break; case '\r': // EOL processing [#2.11] if (getch() != '\n') { bkch(); } bappend('\n'); break; case '<': mEvt = EV_WSPC; bkch(); bflash_ws(); break; default: bkch(); st = 2; break; } break; case 2: // read the text content of the element switch (ch) { case '&': if (mUnent == null) { // There was no unresolved entity on previous step. if ((mUnent = ent('x')) != null) { mEvt = EV_TEXT; bkch(); // move back to ';' after entity name setch('&'); // parser must be back on next step bflash(); } } else { // There was unresolved entity on previous step. mEvt = EV_ENT; skippedEnt(mUnent); mUnent = null; } break; case '<': mEvt = EV_TEXT; bkch(); bflash(); break; case '\r': // EOL processing [#2.11] if (getch() != '\n') { bkch(); } bappend('\n'); break; case EOS: panic(FAULT); default: bappend(ch); break; } break; default: panic(FAULT); } } return mEvt; }
Parses the document type declaration.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Parses the document type declaration. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
private void dtd() throws Exception { char ch; String str = null; String name = null; Pair psid = null; // read 'DOCTYPE' if ("DOCTYPE".equals(name(false)) != true) { panic(FAULT); } mPh = PH_DTD; // DTD for (short st = 0; st >= 0;) { ch = getch(); switch (st) { case 0: // read the document type name if (chtyp(ch) != ' ') { bkch(); name = name(mIsNSAware); wsskip(); st = 1; // read 'PUPLIC' or 'SYSTEM' } break; case 1: // read 'PUPLIC' or 'SYSTEM' switch (chtyp(ch)) { case 'A': bkch(); psid = pubsys(' '); st = 2; // skip spaces before internal subset docType(name, psid.name, psid.value); break; case '[': bkch(); st = 2; // skip spaces before internal subset docType(name, null, null); break; case '>': bkch(); st = 3; // skip spaces after internal subset docType(name, null, null); break; default: panic(FAULT); } break; case 2: // skip spaces before internal subset switch (chtyp(ch)) { case '[': // Process internal subset dtdsub(); st = 3; // skip spaces after internal subset break; case '>': // There is no internal subset bkch(); st = 3; // skip spaces after internal subset break; case ' ': // skip white spaces break; default: panic(FAULT); } break; case 3: // skip spaces after internal subset switch (chtyp(ch)) { case '>': if (psid != null) { // Report the DTD external subset InputSource is = resolveEnt(name, psid.name, psid.value); if (is != null) { if (mIsSAlone == false) { // Set the end of DTD external subset char bkch(); setch(']'); // Set the DTD external subset InputSource push(new Input(BUFFSIZE_READER)); setinp(is); mInp.pubid = psid.name; mInp.sysid = psid.value; // Parse the DTD external subset dtdsub(); } else { // Unresolved DTD external subset skippedEnt("[dtd]"); // Release reader and stream if (is.getCharacterStream() != null) { try { is.getCharacterStream().close(); } catch (IOException ioe) { } } if (is.getByteStream() != null) { try { is.getByteStream().close(); } catch (IOException ioe) { } } } } else { // Unresolved DTD external subset skippedEnt("[dtd]"); } del(psid); } st = -1; // end of DTD break; case ' ': // skip white spaces break; default: panic(FAULT); } break; default: panic(FAULT); } } }
Parses the document type declaration subset.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Parses the document type declaration subset. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
private void dtdsub() throws Exception { startInternalSub(); // reports the event before parsing the subset char ch; for (short st = 0; st >= 0;) { ch = getch(); switch (st) { case 0: // skip white spaces before a declaration switch (chtyp(ch)) { case '<': ch = getch(); switch (ch) { case '?': pi(); break; case '!': ch = getch(); bkch(); if (ch == '-') { comm(); break; } // A markup or an entity declaration bntok(); switch (bkeyword()) { case 'n': dtdent(); break; case 'a': dtdattl(); // parse attributes declaration break; case 'e': dtdelm(); // parse element declaration break; case 'o': dtdnot(); // parse notation declaration break; default: panic(FAULT); // unsupported markup declaration break; } st = 1; // read the end of declaration break; default: panic(FAULT); break; } break; case '%': // A parameter entity reference pent(' '); break; case ']': // End of DTD subset st = -1; break; case ' ': // Skip white spaces break; case 'Z': // End of stream if (getch() != ']') { panic(FAULT); } st = -1; break; default: panic(FAULT); } break; case 1: // read the end of declaration switch (ch) { case '>': // there is no notation st = 0; // skip white spaces before a declaration break; case ' ': case '\n': case '\r': case '\t': // Skip white spaces break; default: panic(FAULT); break; } break; default: panic(FAULT); } } }
Parses an entity declaration. This method fills the general ( mEnt) and parameter ( mPEnt) entity look up table.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Parses an entity declaration. This method fills the general ( * <code>mEnt</code>) and parameter * ( * <code>mPEnt</code>) entity look up table. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private void dtdent() throws Exception { String str = null; char[] val = null; Input inp = null; Pair ids = null; char ch; for (short st = 0; st >= 0;) { ch = getch(); switch (st) { case 0: // skip white spaces before entity name switch (chtyp(ch)) { case ' ': // Skip white spaces break; case '%': // Parameter entity or parameter entity declaration. ch = getch(); bkch(); if (chtyp(ch) == ' ') { // Parameter entity declaration. wsskip(); str = name(false); switch (chtyp(wsskip())) { case 'A': // Read the external identifier ids = pubsys(' '); if (wsskip() == '>') { // External parsed entity if (mPEnt.containsKey(str) == false) { // [#4.2] inp = new Input(); inp.pubid = ids.name; inp.sysid = ids.value; mPEnt.put(str, inp); } } else { panic(FAULT); } del(ids); st = -1; // the end of declaration break; case '\"': case '\'': // Read the parameter entity value bqstr('d'); // Create the parameter entity value val = new char[mBuffIdx + 1]; System.arraycopy(mBuff, 1, val, 1, val.length - 1); // Add surrounding spaces [#4.4.8] val[0] = ' '; // Add the entity to the entity look up table if (mPEnt.containsKey(str) == false) { // [#4.2] inp = new Input(val); inp.pubid = mInp.pubid; inp.sysid = mInp.sysid; inp.xmlenc = mInp.xmlenc; inp.xmlver = mInp.xmlver; mPEnt.put(str, inp); } st = -1; // the end of declaration break; default: panic(FAULT); break; } } else { // Parameter entity reference. pent(' '); } break; default: bkch(); str = name(false); st = 1; // read entity declaration value break; } break; case 1: // read entity declaration value switch (chtyp(ch)) { case '\"': // internal entity case '\'': bkch(); bqstr('d'); // read a string into the buffer if (mEnt.get(str) == null) { // Create general entity value val = new char[mBuffIdx]; System.arraycopy(mBuff, 1, val, 0, val.length); // Add the entity to the entity look up table if (mEnt.containsKey(str) == false) { // [#4.2] inp = new Input(val); inp.pubid = mInp.pubid; inp.sysid = mInp.sysid; inp.xmlenc = mInp.xmlenc; inp.xmlver = mInp.xmlver; mEnt.put(str, inp); } } st = -1; // the end of declaration break; case 'A': // external entity bkch(); ids = pubsys(' '); switch (wsskip()) { case '>': // external parsed entity if (mEnt.containsKey(str) == false) { // [#4.2] inp = new Input(); inp.pubid = ids.name; inp.sysid = ids.value; mEnt.put(str, inp); } break; case 'N': // external general unparsed entity if ("NDATA".equals(name(false)) == true) { wsskip(); unparsedEntDecl(str, ids.name, ids.value, name(false)); break; } default: panic(FAULT); break; } del(ids); st = -1; // the end of declaration break; case ' ': // Skip white spaces break; default: panic(FAULT); break; } break; default: panic(FAULT); } } }
Parses an element declaration. This method parses the declaration up to the closing angle bracket.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Parses an element declaration. * * This method parses the declaration up to the closing angle bracket. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private void dtdelm() throws Exception { // This is stub implementation which skips an element // declaration. wsskip(); name(mIsNSAware); char ch; while (true) { ch = getch(); switch (ch) { case '>': bkch(); return; case EOS: panic(FAULT); default: break; } } }
Parses an attribute list declaration. This method parses the declaration up to the closing angle bracket.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Parses an attribute list declaration. * * This method parses the declaration up to the closing angle bracket. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
private void dtdattl() throws Exception { char elmqn[] = null; Pair elm = null; char ch; for (short st = 0; st >= 0;) { ch = getch(); switch (st) { case 0: // read the element name switch (chtyp(ch)) { case 'a': case 'A': case '_': case 'X': case ':': bkch(); // Get the element from the list or add a new one. elmqn = qname(mIsNSAware); elm = find(mAttL, elmqn); if (elm == null) { elm = pair(mAttL); elm.chars = elmqn; mAttL = elm; } st = 1; // read an attribute declaration break; case ' ': break; case '%': pent(' '); break; default: panic(FAULT); break; } break; case 1: // read an attribute declaration switch (chtyp(ch)) { case 'a': case 'A': case '_': case 'X': case ':': bkch(); dtdatt(elm); if (wsskip() == '>') { return; } break; case ' ': break; case '%': pent(' '); break; default: panic(FAULT); break; } break; default: panic(FAULT); break; } } }
Parses an attribute declaration. The attribute uses the following fields of Pair object: chars - characters of qualified name id - the type identifier of the attribute list - a pair which holds the default value (chars field)
Params:
  • elm – An object which represents all defined attributes on an element.
Throws:
/** * Parses an attribute declaration. * * The attribute uses the following fields of Pair object: chars - characters * of qualified name id - the type identifier of the attribute list - a pair * which holds the default value (chars field) * * @param elm An object which represents all defined attributes on an * element. * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private void dtdatt(Pair elm) throws Exception { char attqn[] = null; Pair att = null; char ch; for (short st = 0; st >= 0;) { ch = getch(); switch (st) { case 0: // the attribute name switch (chtyp(ch)) { case 'a': case 'A': case '_': case 'X': case ':': bkch(); // Get the attribute from the list or add a new one. attqn = qname(mIsNSAware); att = find(elm.list, attqn); if (att == null) { // New attribute declaration att = pair(elm.list); att.chars = attqn; elm.list = att; } else { // Do not override the attribute declaration [#3.3] att = pair(null); att.chars = attqn; att.id = 'c'; } wsskip(); st = 1; break; case '%': pent(' '); break; case ' ': break; default: panic(FAULT); break; } break; case 1: // the attribute type switch (chtyp(ch)) { case '(': att.id = 'u'; // enumeration type st = 2; // read the first element of the list break; case '%': pent(' '); break; case ' ': break; default: bkch(); bntok(); // read type id att.id = bkeyword(); switch (att.id) { case 'o': // NOTATION if (wsskip() != '(') { panic(FAULT); } ch = getch(); st = 2; // read the first element of the list break; case 'i': // ID case 'r': // IDREF case 'R': // IDREFS case 'n': // ENTITY case 'N': // ENTITIES case 't': // NMTOKEN case 'T': // NMTOKENS case 'c': // CDATA wsskip(); st = 4; // read default declaration break; default: panic(FAULT); break; } break; } break; case 2: // read the first element of the list switch (chtyp(ch)) { case 'a': case 'A': case 'd': case '.': case ':': case '-': case '_': case 'X': bkch(); switch (att.id) { case 'u': // enumeration type bntok(); break; case 'o': // NOTATION mBuffIdx = -1; bname(false); break; default: panic(FAULT); break; } wsskip(); st = 3; // read next element of the list break; case '%': pent(' '); break; case ' ': break; default: panic(FAULT); break; } break; case 3: // read next element of the list switch (ch) { case ')': wsskip(); st = 4; // read default declaration break; case '|': wsskip(); switch (att.id) { case 'u': // enumeration type bntok(); break; case 'o': // NOTATION mBuffIdx = -1; bname(false); break; default: panic(FAULT); break; } wsskip(); break; case '%': pent(' '); break; default: panic(FAULT); break; } break; case 4: // read default declaration switch (ch) { case '#': bntok(); switch (bkeyword()) { case 'F': // FIXED switch (wsskip()) { case '\"': case '\'': st = 5; // read the default value break; case EOS: panic(FAULT); default: st = -1; break; } break; case 'Q': // REQUIRED case 'I': // IMPLIED st = -1; break; default: panic(FAULT); break; } break; case '\"': case '\'': bkch(); st = 5; // read the default value break; case ' ': case '\n': case '\r': case '\t': break; case '%': pent(' '); break; default: bkch(); st = -1; break; } break; case 5: // read the default value switch (ch) { case '\"': case '\'': bkch(); bqstr('d'); // the value in the mBuff now att.list = pair(null); // Create a string like "attqname='value' " att.list.chars = new char[att.chars.length + mBuffIdx + 3]; System.arraycopy( att.chars, 1, att.list.chars, 0, att.chars.length - 1); att.list.chars[att.chars.length - 1] = '='; att.list.chars[att.chars.length] = ch; System.arraycopy( mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx); att.list.chars[att.chars.length + mBuffIdx + 1] = ch; att.list.chars[att.chars.length + mBuffIdx + 2] = ' '; st = -1; break; default: panic(FAULT); break; } break; default: panic(FAULT); break; } } }
Parses a notation declaration. This method parses the declaration up to the closing angle bracket.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Parses a notation declaration. * * This method parses the declaration up to the closing angle bracket. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
private void dtdnot() throws Exception { wsskip(); String name = name(false); wsskip(); Pair ids = pubsys('N'); notDecl(name, ids.name, ids.value); del(ids); }
Parses an attribute. This recursive method is responsible for prefix addition ( mPref) on the way down. The element's start tag end triggers the return process. The method then on it's way back resolves prefixes and accumulates attributes.

att.num carries attribute flags where: 0x1 - attribute is declared in DTD (attribute decalration had been read); 0x2 - attribute's default value is used.

Params:
  • att – An object which reprecents current attribute.
Throws:
/** * Parses an attribute. * * This recursive method is responsible for prefix addition * ( * <code>mPref</code>) on the way down. The element's start tag end triggers * the return process. The method then on it's way back resolves prefixes * and accumulates attributes. * * <p><code>att.num</code> carries attribute flags where: 0x1 - attribute is * declared in DTD (attribute decalration had been read); 0x2 - attribute's * default value is used.</p> * * @param att An object which reprecents current attribute. * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private void attr(Pair att) throws Exception { switch (wsskip()) { case '/': case '>': if ((att.num & 0x2) == 0) { // all attributes have been read att.num |= 0x2; // set default attribute flag Input inp = mInp; // Go through all attributes defined on current element. for (Pair def = mElm.list; def != null; def = def.next) { if (def.list == null) // no default value { continue; } // Go through all attributes defined on current // element and add defaults. Pair act = find(att.next, def.chars); if (act == null) { push(new Input(def.list.chars)); } } if (mInp != inp) { // defaults have been added attr(att); return; } } // Ensure the attribute string array capacity mAttrs.setLength(mAttrIdx); mItems = mAttrs.mItems; return; case EOS: panic(FAULT); default: // Read the attribute name and value att.chars = qname(mIsNSAware); att.name = att.local(); String type = atype(att); // sets attribute's type on att.id wsskip(); if (getch() != '=') { panic(FAULT); } bqstr((char) att.id); // read the value with normalization. String val = new String(mBuff, 1, mBuffIdx); Pair next = pair(att); next.num = (att.num & ~0x1); // inherit attribute flags // Put a namespace declaration on top of the prefix stack if ((mIsNSAware == false) || (isdecl(att, val) == false)) { // An ordinary attribute mAttrIdx++; attr(next); // recursive call to parse the next attribute mAttrIdx--; // Add the attribute to the attributes string array char idx = (char) (mAttrIdx << 3); mItems[idx + 1] = att.qname(); // attr qname mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name mItems[idx + 3] = val; // attr value mItems[idx + 4] = type; // attr type switch (att.num & 0x3) { case 0x0: mItems[idx + 5] = null; break; case 0x1: // declared attribute mItems[idx + 5] = "d"; break; default: // 0x2, 0x3 - default attribute always declared mItems[idx + 5] = "D"; break; } // Resolve the prefix if any and report the attribute // NOTE: The attribute does not accept the default namespace. mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars) : ""; } else { // A namespace declaration. mPref.name contains prefix and // mPref.value contains namespace URI set by isdecl method. // Report a start of the new mapping newPrefix(); // Recursive call to parse the next attribute attr(next); // NOTE: The namespace declaration is not reported. } del(next); break; } }
Retrieves attribute type. This method sets the type of normalization in the attribute id field and returns the name of attribute type.
Params:
  • att – An object which represents current attribute.
Throws:
  • Exception – is parser specific exception form panic method.
Returns:The name of the attribute type.
/** * Retrieves attribute type. * * This method sets the type of normalization in the attribute * <code>id</code> field and returns the name of attribute type. * * @param att An object which represents current attribute. * @return The name of the attribute type. * @exception Exception is parser specific exception form panic method. */
private String atype(Pair att) throws Exception { Pair attr; // CDATA-type normalization by default [#3.3.3] att.id = 'c'; if (mElm.list == null || (attr = find(mElm.list, att.chars)) == null) { return "CDATA"; } att.num |= 0x1; // attribute is declared // Non-CDATA normalization except when the attribute type is CDATA. att.id = 'i'; switch (attr.id) { case 'i': return "ID"; case 'r': return "IDREF"; case 'R': return "IDREFS"; case 'n': return "ENTITY"; case 'N': return "ENTITIES"; case 't': return "NMTOKEN"; case 'T': return "NMTOKENS"; case 'u': return "NMTOKEN"; case 'o': return "NOTATION"; case 'c': att.id = 'c'; return "CDATA"; default: panic(FAULT); } return null; }
Parses a comment. The '<!' part is read in dispatcher so the method starts with first '-' after '<!'.
Throws:
  • Exception – is parser specific exception form panic method.
/** * Parses a comment. * * The &apos;&lt;!&apos; part is read in dispatcher so the method starts * with first &apos;-&apos; after &apos;&lt;!&apos;. * * @exception Exception is parser specific exception form panic method. */
@SuppressWarnings("fallthrough") private void comm() throws Exception { if (mPh == PH_DOC_START) { mPh = PH_MISC_DTD; // misc before DTD } // '<!' has been already read by dispetcher. char ch; mBuffIdx = -1; for (short st = 0; st >= 0;) { ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); if (ch == EOS) { panic(FAULT); } switch (st) { case 0: // first '-' of the comment open if (ch == '-') { st = 1; } else { panic(FAULT); } break; case 1: // secind '-' of the comment open if (ch == '-') { st = 2; } else { panic(FAULT); } break; case 2: // skip the comment body switch (ch) { case '-': st = 3; break; default: bappend(ch); break; } break; case 3: // second '-' of the comment close switch (ch) { case '-': st = 4; break; default: bappend('-'); bappend(ch); st = 2; break; } break; case 4: // '>' of the comment close if (ch == '>') { comm(mBuff, mBuffIdx + 1); st = -1; break; } // else - panic [#2.5 compatibility note] default: panic(FAULT); } } }
Parses a processing instruction. The '<?' is read in dispatcher so the method starts with first character of PI target name after '<?'.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Parses a processing instruction. * * The &apos;&lt;?&apos; is read in dispatcher so the method starts with * first character of PI target name after &apos;&lt;?&apos;. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
private void pi() throws Exception { // '<?' has been already read by dispetcher. char ch; String str = null; mBuffIdx = -1; for (short st = 0; st >= 0;) { ch = getch(); if (ch == EOS) { panic(FAULT); } switch (st) { case 0: // read the PI target name switch (chtyp(ch)) { case 'a': case 'A': case '_': case ':': case 'X': bkch(); str = name(false); // PI target name may not be empty string [#2.6] // PI target name 'XML' is reserved [#2.6] if ((str.isEmpty()) || (mXml.name.equals(str.toLowerCase()) == true)) { panic(FAULT); } // This is processing instruction if (mPh == PH_DOC_START) // the begining of the document { mPh = PH_MISC_DTD; // misc before DTD } wsskip(); // skip spaces after the PI target name st = 1; // accumulate the PI body mBuffIdx = -1; break; default: panic(FAULT); } break; case 1: // accumulate the PI body switch (ch) { case '?': st = 2; // end of the PI body break; default: bappend(ch); break; } break; case 2: // end of the PI body switch (ch) { case '>': // PI has been read. pi(str, new String(mBuff, 0, mBuffIdx + 1)); st = -1; break; case '?': bappend('?'); break; default: bappend('?'); bappend(ch); st = 1; // accumulate the PI body break; } break; default: panic(FAULT); } } }
Parses a character data. The '<!' part is read in dispatcher so the method starts with first '[' after '<!'.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Parses a character data. * * The &apos;&lt;!&apos; part is read in dispatcher so the method starts * with first &apos;[&apos; after &apos;&lt;!&apos;. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
private void cdat() throws Exception { // '<!' has been already read by dispetcher. char ch; mBuffIdx = -1; for (short st = 0; st >= 0;) { ch = getch(); switch (st) { case 0: // the first '[' of the CDATA open if (ch == '[') { st = 1; } else { panic(FAULT); } break; case 1: // read "CDATA" if (chtyp(ch) == 'A') { bappend(ch); } else { if ("CDATA".equals( new String(mBuff, 0, mBuffIdx + 1)) != true) { panic(FAULT); } bkch(); st = 2; } break; case 2: // the second '[' of the CDATA open if (ch != '[') { panic(FAULT); } mBuffIdx = -1; st = 3; break; case 3: // read data before the first ']' if (ch != ']') { bappend(ch); } else { st = 4; } break; case 4: // read the second ']' or continue to read the data if (ch != ']') { bappend(']'); bappend(ch); st = 3; } else { st = 5; } break; case 5: // read '>' or continue to read the data switch (ch) { case ']': bappend(']'); break; case '>': bflash(); st = -1; break; default: bappend(']'); bappend(']'); bappend(ch); st = 3; break; } break; default: panic(FAULT); } } }
Reads a xml name. The xml name must conform "Namespaces in XML" specification. Therefore the ':' character is not allowed in the name. This method should be used for PI and entity names which may not have a namespace according to the specification mentioned above.
Params:
  • ns – The true value turns namespace conformance on.
Throws:
Returns:The name has been read.
/** * Reads a xml name. * * The xml name must conform "Namespaces in XML" specification. Therefore * the ':' character is not allowed in the name. This method should be used * for PI and entity names which may not have a namespace according to the * specification mentioned above. * * @param ns The true value turns namespace conformance on. * @return The name has been read. * @exception Exception When incorrect character appear in the name. * @exception IOException */
protected String name(boolean ns) throws Exception { mBuffIdx = -1; bname(ns); return new String(mBuff, 1, mBuffIdx); }
Reads a qualified xml name. The characters of a qualified name is an array of characters. The first (chars[0]) character is the index of the colon character which separates the prefix from the local name. If the index is zero, the name does not contain separator or the parser works in the namespace unaware mode. The length of qualified name is the length of the array minus one.
Params:
  • ns – The true value turns namespace conformance on.
Throws:
Returns:The characters of a qualified name.
/** * Reads a qualified xml name. * * The characters of a qualified name is an array of characters. The first * (chars[0]) character is the index of the colon character which separates * the prefix from the local name. If the index is zero, the name does not * contain separator or the parser works in the namespace unaware mode. The * length of qualified name is the length of the array minus one. * * @param ns The true value turns namespace conformance on. * @return The characters of a qualified name. * @exception Exception When incorrect character appear in the name. * @exception IOException */
protected char[] qname(boolean ns) throws Exception { mBuffIdx = -1; bname(ns); char chars[] = new char[mBuffIdx + 1]; System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1); return chars; }
Reads the public or/and system identifiers.
Params:
  • inp – The input object.
Throws:
/** * Reads the public or/and system identifiers. * * @param inp The input object. * @exception Exception is parser specific exception form panic method. * @exception IOException */
private void pubsys(Input inp) throws Exception { Pair pair = pubsys(' '); inp.pubid = pair.name; inp.sysid = pair.value; del(pair); }
Reads the public or/and system identifiers.
Params:
  • flag – The 'N' allows public id be without system id.
Throws:
Returns:The public or/and system identifiers pair.
/** * Reads the public or/and system identifiers. * * @param flag The 'N' allows public id be without system id. * @return The public or/and system identifiers pair. * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private Pair pubsys(char flag) throws Exception { Pair ids = pair(null); String str = name(false); if ("PUBLIC".equals(str) == true) { bqstr('i'); // non-CDATA normalization [#4.2.2] ids.name = new String(mBuff, 1, mBuffIdx); switch (wsskip()) { case '\"': case '\'': bqstr(' '); ids.value = new String(mBuff, 1, mBuffIdx); break; case EOS: panic(FAULT); default: if (flag != 'N') // [#4.7] { panic(FAULT); } ids.value = null; break; } return ids; } else if ("SYSTEM".equals(str) == true) { ids.name = null; bqstr(' '); ids.value = new String(mBuff, 1, mBuffIdx); return ids; } panic(FAULT); return null; }
Reads an attribute value. The grammar this method can read is:

eqstr := S "=" qstr
qstr  := S ("'" string "'") | ('"' string '"')
This method resolves entities inside a string unless the parser parses DTD.
Params:
  • flag – The '=' character forces the method to accept the '=' character before quoted string and read the following string as not an attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' - not an attribute value; 'd' - in DTD context.
Throws:
Returns:The content of the quoted strign as a string.
/** * Reads an attribute value. * * The grammar this method can read is: * <pre>{@code * eqstr := S "=" qstr * qstr := S ("'" string "'") | ('"' string '"') * }</pre> * This method resolves entities * inside a string unless the parser parses DTD. * * @param flag The '=' character forces the method to accept the '=' * character before quoted string and read the following string as not an * attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; * '-' - not an attribute value; 'd' - in DTD context. * @return The content of the quoted strign as a string. * @exception Exception is parser specific exception form panic method. * @exception IOException */
protected String eqstr(char flag) throws Exception { if (flag == '=') { wsskip(); if (getch() != '=') { panic(FAULT); } } bqstr((flag == '=') ? '-' : flag); return new String(mBuff, 1, mBuffIdx); }
Resoves an entity. This method resolves built-in and character entity references. It is also reports external entities to the application.
Params:
  • flag – The 'x' character forces the method to report a skipped entity; 'i' character - indicates non-CDATA normalization.
Throws:
Returns:Name of unresolved entity or null if entity had been resolved successfully.
/** * Resoves an entity. * * This method resolves built-in and character entity references. It is also * reports external entities to the application. * * @param flag The 'x' character forces the method to report a skipped * entity; 'i' character - indicates non-CDATA normalization. * @return Name of unresolved entity or <code>null</code> if entity had been * resolved successfully. * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private String ent(char flag) throws Exception { char ch; int idx = mBuffIdx + 1; Input inp = null; String str = null; mESt = 0x100; // reset the built-in entity recognizer bappend('&'); for (short st = 0; st >= 0;) { ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); switch (st) { case 0: // the first character of the entity name case 1: // read built-in entity name switch (chtyp(ch)) { case 'd': case '.': case '-': if (st != 1) { panic(FAULT); } case 'a': case 'A': case '_': case 'X': bappend(ch); eappend(ch); st = 1; break; case ':': if (mIsNSAware != false) { panic(FAULT); } bappend(ch); eappend(ch); st = 1; break; case ';': if (mESt < 0x100) { // The entity is a built-in entity mBuffIdx = idx - 1; bappend(mESt); st = -1; break; } else if (mPh == PH_DTD) { // In DTD entity declaration has to resolve character // entities and include "as is" others. [#4.4.7] bappend(';'); st = -1; break; } // Convert an entity name to a string str = new String(mBuff, idx + 1, mBuffIdx - idx); inp = mEnt.get(str); // Restore the buffer offset mBuffIdx = idx - 1; if (inp != null) { if (inp.chars == null) { // External entity InputSource is = resolveEnt(str, inp.pubid, inp.sysid); if (is != null) { push(new Input(BUFFSIZE_READER)); setinp(is); mInp.pubid = inp.pubid; mInp.sysid = inp.sysid; str = null; // the entity is resolved } else { // Unresolved external entity if (flag != 'x') { panic(FAULT); // unknown entity within marckup } // str is name of unresolved entity } } else { // Internal entity push(inp); str = null; // the entity is resolved } } else { // Unknown or general unparsed entity if (flag != 'x') { panic(FAULT); // unknown entity within marckup } // str is name of unresolved entity } st = -1; break; case '#': if (st != 0) { panic(FAULT); } st = 2; break; default: panic(FAULT); } break; case 2: // read character entity switch (chtyp(ch)) { case 'd': bappend(ch); break; case ';': // Convert the character entity to a character try { int i = Integer.parseInt( new String(mBuff, idx + 1, mBuffIdx - idx), 10); if (i >= 0xffff) { panic(FAULT); } ch = (char) i; } catch (NumberFormatException nfe) { panic(FAULT); } // Restore the buffer offset mBuffIdx = idx - 1; if (ch == ' ' || mInp.next != null) { bappend(ch, flag); } else { bappend(ch); } st = -1; break; case 'a': // If the entity buffer is empty and ch == 'x' if ((mBuffIdx == idx) && (ch == 'x')) { st = 3; break; } default: panic(FAULT); } break; case 3: // read hex character entity switch (chtyp(ch)) { case 'A': case 'a': case 'd': bappend(ch); break; case ';': // Convert the character entity to a character try { int i = Integer.parseInt( new String(mBuff, idx + 1, mBuffIdx - idx), 16); if (i >= 0xffff) { panic(FAULT); } ch = (char) i; } catch (NumberFormatException nfe) { panic(FAULT); } // Restore the buffer offset mBuffIdx = idx - 1; if (ch == ' ' || mInp.next != null) { bappend(ch, flag); } else { bappend(ch); } st = -1; break; default: panic(FAULT); } break; default: panic(FAULT); } } return str; }
Resoves a parameter entity. This method resolves a parameter entity references. It is also reports external entities to the application.
Params:
  • flag – The '-' instruct the method to do not set up surrounding spaces [#4.4.8].
Throws:
/** * Resoves a parameter entity. * * This method resolves a parameter entity references. It is also reports * external entities to the application. * * @param flag The '-' instruct the method to do not set up surrounding * spaces [#4.4.8]. * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private void pent(char flag) throws Exception { char ch; int idx = mBuffIdx + 1; Input inp = null; String str = null; bappend('%'); if (mPh != PH_DTD) // the DTD internal subset { return; // Not Recognized [#4.4.1] } // Read entity name bname(false); str = new String(mBuff, idx + 2, mBuffIdx - idx - 1); if (getch() != ';') { panic(FAULT); } inp = mPEnt.get(str); // Restore the buffer offset mBuffIdx = idx - 1; if (inp != null) { if (inp.chars == null) { // External parameter entity InputSource is = resolveEnt(str, inp.pubid, inp.sysid); if (is != null) { if (flag != '-') { bappend(' '); // tail space } push(new Input(BUFFSIZE_READER)); // BUG: there is no leading space! [#4.4.8] setinp(is); mInp.pubid = inp.pubid; mInp.sysid = inp.sysid; } else { // Unresolved external parameter entity skippedEnt("%" + str); } } else { // Internal parameter entity if (flag == '-') { // No surrounding spaces inp.chIdx = 1; } else { // Insert surrounding spaces bappend(' '); // tail space inp.chIdx = 0; } push(inp); } } else { // Unknown parameter entity skippedEnt("%" + str); } }
Recognizes and handles a namespace declaration. This method identifies a type of namespace declaration if any and puts new mapping on top of prefix stack.
Params:
  • name – The attribute qualified name (name.value is a String object which represents the attribute prefix).
  • value – The attribute value.
Returns:true if a namespace declaration is recognized.
/** * Recognizes and handles a namespace declaration. * * This method identifies a type of namespace declaration if any and puts * new mapping on top of prefix stack. * * @param name The attribute qualified name (<code>name.value</code> is a * <code>String</code> object which represents the attribute prefix). * @param value The attribute value. * @return <code>true</code> if a namespace declaration is recognized. */
private boolean isdecl(Pair name, String value) { if (name.chars[0] == 0) { if ("xmlns".equals(name.name) == true) { // New default namespace declaration mPref = pair(mPref); mPref.list = mElm; // prefix owner element mPref.value = value; mPref.name = ""; mPref.chars = NONS; mElm.num++; // namespace counter return true; } } else { if (name.eqpref(XMLNS) == true) { // New prefix declaration int len = name.name.length(); mPref = pair(mPref); mPref.list = mElm; // prefix owner element mPref.value = value; mPref.name = name.name; mPref.chars = new char[len + 1]; mPref.chars[0] = (char) (len + 1); name.name.getChars(0, len, mPref.chars, 1); mElm.num++; // namespace counter return true; } } return false; }
Resolves a prefix.
Throws:
  • Exception – When mapping for specified prefix is not found.
Returns:The namespace assigned to the prefix.
/** * Resolves a prefix. * * @return The namespace assigned to the prefix. * @exception Exception When mapping for specified prefix is not found. */
private String rslv(char[] qname) throws Exception { for (Pair pref = mPref; pref != null; pref = pref.next) { if (pref.eqpref(qname) == true) { return pref.value; } } if (qname[0] == 1) { // QNames like ':local' for (Pair pref = mPref; pref != null; pref = pref.next) { if (pref.chars[0] == 0) { return pref.value; } } } panic(FAULT); return null; }
Skips xml white space characters. This method skips white space characters (' ', '\t', '\n', '\r') and looks ahead not white space character.
Throws:
Returns:The first not white space look ahead character.
/** * Skips xml white space characters. * * This method skips white space characters (' ', '\t', '\n', '\r') and * looks ahead not white space character. * * @return The first not white space look ahead character. * @exception IOException */
protected char wsskip() throws IOException { char ch; while (true) { // Read next character ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); if (ch < 0x80) { if (nmttyp[ch] != 3) // [ \t\n\r] { break; } } else { break; } } mChIdx--; // bkch(); return ch; }
Reports document type.
Params:
  • name – The name of the entity.
  • pubid – The public identifier of the entity or null.
  • sysid – The system identifier of the entity or null.
/** * Reports document type. * * @param name The name of the entity. * @param pubid The public identifier of the entity or <code>null</code>. * @param sysid The system identifier of the entity or <code>null</code>. */
protected abstract void docType(String name, String pubid, String sysid) throws SAXException;
Reports the start of DTD internal subset.
Throws:
  • SAXException – if the receiver throws SAXException
/** * Reports the start of DTD internal subset. * * @throws SAXException if the receiver throws SAXException */
public abstract void startInternalSub () throws SAXException;
Reports a comment.
Params:
  • text – The comment text starting from first charcater.
  • length – The number of characters in comment.
/** * Reports a comment. * * @param text The comment text starting from first charcater. * @param length The number of characters in comment. */
protected abstract void comm(char[] text, int length);
Reports a processing instruction.
Params:
  • target – The processing instruction target name.
  • body – The processing instruction body text.
/** * Reports a processing instruction. * * @param target The processing instruction target name. * @param body The processing instruction body text. */
protected abstract void pi(String target, String body) throws Exception;
Reports new namespace prefix. The Namespace prefix ( mPref.name) being declared and the Namespace URI ( mPref.value) the prefix is mapped to. An empty string is used for the default element namespace, which has no prefix.
/** * Reports new namespace prefix. The Namespace prefix ( * <code>mPref.name</code>) being declared and the Namespace URI ( * <code>mPref.value</code>) the prefix is mapped to. An empty string is * used for the default element namespace, which has no prefix. */
protected abstract void newPrefix() throws Exception;
Reports skipped entity name.
Params:
  • name – The entity name.
/** * Reports skipped entity name. * * @param name The entity name. */
protected abstract void skippedEnt(String name) throws Exception;
Returns an InputSource for specified entity or null.
Params:
  • name – The name of the entity.
  • pubid – The public identifier of the entity.
  • sysid – The system identifier of the entity.
/** * Returns an * <code>InputSource</code> for specified entity or * <code>null</code>. * * @param name The name of the entity. * @param pubid The public identifier of the entity. * @param sysid The system identifier of the entity. */
protected abstract InputSource resolveEnt( String name, String pubid, String sysid) throws Exception;
Reports notation declaration.
Params:
  • name – The notation's name.
  • pubid – The notation's public identifier, or null if none was given.
  • sysid – The notation's system identifier, or null if none was given.
/** * Reports notation declaration. * * @param name The notation's name. * @param pubid The notation's public identifier, or null if none was given. * @param sysid The notation's system identifier, or null if none was given. */
protected abstract void notDecl(String name, String pubid, String sysid) throws Exception;
Reports unparsed entity name.
Params:
  • name – The unparsed entity's name.
  • pubid – The entity's public identifier, or null if none was given.
  • sysid – The entity's system identifier.
  • notation – The name of the associated notation.
/** * Reports unparsed entity name. * * @param name The unparsed entity's name. * @param pubid The entity's public identifier, or null if none was given. * @param sysid The entity's system identifier. * @param notation The name of the associated notation. */
protected abstract void unparsedEntDecl( String name, String pubid, String sysid, String notation) throws Exception;
Notifies the handler about fatal parsing error.
Params:
  • msg – The problem description message.
/** * Notifies the handler about fatal parsing error. * * @param msg The problem description message. */
protected abstract void panic(String msg) throws Exception;
Reads a qualified xml name. This is low level routine which leaves a qName in the buffer. The characters of a qualified name is an array of characters. The first (chars[0]) character is the index of the colon character which separates the prefix from the local name. If the index is zero, the name does not contain separator or the parser works in the namespace unaware mode. The length of qualified name is the length of the array minus one.
Params:
  • ns – The true value turns namespace conformance on.
Throws:
/** * Reads a qualified xml name. * * This is low level routine which leaves a qName in the buffer. The * characters of a qualified name is an array of characters. The first * (chars[0]) character is the index of the colon character which separates * the prefix from the local name. If the index is zero, the name does not * contain separator or the parser works in the namespace unaware mode. The * length of qualified name is the length of the array minus one. * * @param ns The true value turns namespace conformance on. * @exception Exception is parser specific exception form panic method. * @exception IOException */
private void bname(boolean ns) throws Exception { char ch; char type; mBuffIdx++; // allocate a char for colon offset int bqname = mBuffIdx; int bcolon = bqname; int bchidx = bqname + 1; int bstart = bchidx; int cstart = mChIdx; short st = (short) ((ns == true) ? 0 : 2); while (true) { // Read next character if (mChIdx >= mChLen) { bcopy(cstart, bstart); getch(); mChIdx--; // bkch(); cstart = mChIdx; bstart = bchidx; } ch = mChars[mChIdx++]; type = (char) 0; // [X] if (ch < 0x80) { type = (char) nmttyp[ch]; } else if (ch == EOS) { panic(FAULT); } // Parse QName switch (st) { case 0: // read the first char of the prefix case 2: // read the first char of the suffix switch (type) { case 0: // [aA_X] bchidx++; // append char to the buffer st++; // (st == 0)? 1: 3; break; case 1: // [:] mChIdx--; // bkch(); st++; // (st == 0)? 1: 3; break; default: panic(FAULT); } break; case 1: // read the prefix case 3: // read the suffix switch (type) { case 0: // [aA_X] case 2: // [.-d] bchidx++; // append char to the buffer break; case 1: // [:] bchidx++; // append char to the buffer if (ns == true) { if (bcolon != bqname) { panic(FAULT); // it must be only one colon } bcolon = bchidx - 1; if (st == 1) { st = 2; } } break; default: mChIdx--; // bkch(); bcopy(cstart, bstart); mBuff[bqname] = (char) (bcolon - bqname); return; } break; default: panic(FAULT); } } }
Reads a nmtoken. This is low level routine which leaves a nmtoken in the buffer.
Throws:
  • Exception – is parser specific exception form panic method.
  • IOException
/** * Reads a nmtoken. * * This is low level routine which leaves a nmtoken in the buffer. * * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private void bntok() throws Exception { char ch; mBuffIdx = -1; bappend((char) 0); // default offset to the colon char while (true) { ch = getch(); switch (chtyp(ch)) { case 'a': case 'A': case 'd': case '.': case ':': case '-': case '_': case 'X': bappend(ch); break; case 'Z': panic(FAULT); default: bkch(); return; } } }
Recognizes a keyword. This is low level routine which recognizes one of keywords in the buffer. Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN - t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED - Q IMPLIED - I FIXED - F
Throws:
Returns:an id of a keyword or '?'.
/** * Recognizes a keyword. * * This is low level routine which recognizes one of keywords in the buffer. * Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN - * t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED - * Q IMPLIED - I FIXED - F * * @return an id of a keyword or '?'. * @exception Exception is parser specific exception form panic method. * @exception IOException */
private char bkeyword() throws Exception { String str = new String(mBuff, 1, mBuffIdx); switch (str.length()) { case 2: // ID return ("ID".equals(str) == true) ? 'i' : '?'; case 5: // IDREF, CDATA, FIXED switch (mBuff[1]) { case 'I': return ("IDREF".equals(str) == true) ? 'r' : '?'; case 'C': return ("CDATA".equals(str) == true) ? 'c' : '?'; case 'F': return ("FIXED".equals(str) == true) ? 'F' : '?'; default: break; } break; case 6: // IDREFS, ENTITY switch (mBuff[1]) { case 'I': return ("IDREFS".equals(str) == true) ? 'R' : '?'; case 'E': return ("ENTITY".equals(str) == true) ? 'n' : '?'; default: break; } break; case 7: // NMTOKEN, IMPLIED, ATTLIST, ELEMENT switch (mBuff[1]) { case 'I': return ("IMPLIED".equals(str) == true) ? 'I' : '?'; case 'N': return ("NMTOKEN".equals(str) == true) ? 't' : '?'; case 'A': return ("ATTLIST".equals(str) == true) ? 'a' : '?'; case 'E': return ("ELEMENT".equals(str) == true) ? 'e' : '?'; default: break; } break; case 8: // ENTITIES, NMTOKENS, NOTATION, REQUIRED switch (mBuff[2]) { case 'N': return ("ENTITIES".equals(str) == true) ? 'N' : '?'; case 'M': return ("NMTOKENS".equals(str) == true) ? 'T' : '?'; case 'O': return ("NOTATION".equals(str) == true) ? 'o' : '?'; case 'E': return ("REQUIRED".equals(str) == true) ? 'Q' : '?'; default: break; } break; default: break; } return '?'; }
Reads a single or double quotted string in to the buffer. This method resolves entities inside a string unless the parser parses DTD.
Params:
  • flag – 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' - not an attribute value; 'd' - in DTD context.
Throws:
/** * Reads a single or double quotted string in to the buffer. * * This method resolves entities inside a string unless the parser parses * DTD. * * @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' - * not an attribute value; 'd' - in DTD context. * @exception Exception is parser specific exception form panic method. * @exception IOException */
@SuppressWarnings("fallthrough") private void bqstr(char flag) throws Exception { Input inp = mInp; // remember the original input mBuffIdx = -1; bappend((char) 0); // default offset to the colon char char ch; for (short st = 0; st >= 0;) { ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch(); switch (st) { case 0: // read a single or double quote switch (ch) { case ' ': case '\n': case '\r': case '\t': break; case '\'': st = 2; // read a single quoted string break; case '\"': st = 3; // read a double quoted string break; default: panic(FAULT); break; } break; case 2: // read a single quoted string case 3: // read a double quoted string switch (ch) { case '\'': if ((st == 2) && (mInp == inp)) { st = -1; } else { bappend(ch); } break; case '\"': if ((st == 3) && (mInp == inp)) { st = -1; } else { bappend(ch); } break; case '&': if (flag != 'd') { ent(flag); } else { bappend(ch); } break; case '%': if (flag == 'd') { pent('-'); } else { bappend(ch); } break; case '<': if ((flag == '-') || (flag == 'd')) { bappend(ch); } else { panic(FAULT); } break; case EOS: // EOS before single/double quote panic(FAULT); case '\r': // EOL processing [#2.11 & #3.3.3] if (flag != ' ' && mInp.next == null) { if (getch() != '\n') { bkch(); } ch = '\n'; } default: bappend(ch, flag); break; } break; default: panic(FAULT); } } // There is maximum one space at the end of the string in // i-mode (non CDATA normalization) and it has to be removed. if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) { mBuffIdx -= 1; } }
Reports characters and empties the parser's buffer. This method is called only if parser is going to return control to the main loop. This means that this method may use parser buffer to report white space without copying characters to temporary buffer.
/** * Reports characters and empties the parser's buffer. This method is called * only if parser is going to return control to the main loop. This means * that this method may use parser buffer to report white space without * copying characters to temporary buffer. */
protected abstract void bflash() throws Exception;
Reports white space characters and empties the parser's buffer. This method is called only if parser is going to return control to the main loop. This means that this method may use parser buffer to report white space without copying characters to temporary buffer.
/** * Reports white space characters and empties the parser's buffer. This * method is called only if parser is going to return control to the main * loop. This means that this method may use parser buffer to report white * space without copying characters to temporary buffer. */
protected abstract void bflash_ws() throws Exception;
Appends a character to parser's buffer with normalization.
Params:
  • ch – The character to append to the buffer.
  • mode – The normalization mode.
/** * Appends a character to parser's buffer with normalization. * * @param ch The character to append to the buffer. * @param mode The normalization mode. */
private void bappend(char ch, char mode) { // This implements attribute value normalization as // described in the XML specification [#3.3.3]. switch (mode) { case 'i': // non CDATA normalization switch (ch) { case ' ': case '\n': case '\r': case '\t': if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) { bappend(' '); } return; default: break; } break; case 'c': // CDATA normalization switch (ch) { case '\n': case '\r': case '\t': ch = ' '; break; default: break; } break; default: // no normalization break; } mBuffIdx++; if (mBuffIdx < mBuff.length) { mBuff[mBuffIdx] = ch; } else { mBuffIdx--; bappend(ch); } }
Appends a character to parser's buffer.
Params:
  • ch – The character to append to the buffer.
/** * Appends a character to parser's buffer. * * @param ch The character to append to the buffer. */
private void bappend(char ch) { try { mBuff[++mBuffIdx] = ch; } catch (Exception exp) { // Double the buffer size char buff[] = new char[mBuff.length << 1]; System.arraycopy(mBuff, 0, buff, 0, mBuff.length); mBuff = buff; mBuff[mBuffIdx] = ch; } }
Appends (mChIdx - cidx) characters from character buffer (mChars) to parser's buffer (mBuff).
Params:
  • cidx – The character buffer (mChars) start index.
  • bidx – The parser buffer (mBuff) start index.
/** * Appends (mChIdx - cidx) characters from character buffer (mChars) to * parser's buffer (mBuff). * * @param cidx The character buffer (mChars) start index. * @param bidx The parser buffer (mBuff) start index. */
private void bcopy(int cidx, int bidx) { int length = mChIdx - cidx; if ((bidx + length + 1) >= mBuff.length) { // Expand the buffer char buff[] = new char[mBuff.length + length]; System.arraycopy(mBuff, 0, buff, 0, mBuff.length); mBuff = buff; } System.arraycopy(mChars, cidx, mBuff, bidx, length); mBuffIdx += length; }
Recognizes the built-in entities lt, gt, amp, apos, quot. The initial state is 0x100. Any state belowe 0x100 is a built-in entity replacement character.
Params:
  • ch – the next character of an entity name.
/** * Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>, * <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe * 0x100 is a built-in entity replacement character. * * @param ch the next character of an entity name. */
@SuppressWarnings("fallthrough") private void eappend(char ch) { switch (mESt) { case 0x100: // "l" or "g" or "a" or "q" switch (ch) { case 'l': mESt = 0x101; break; case 'g': mESt = 0x102; break; case 'a': mESt = 0x103; break; case 'q': mESt = 0x107; break; default: mESt = 0x200; break; } break; case 0x101: // "lt" mESt = (ch == 't') ? '<' : (char) 0x200; break; case 0x102: // "gt" mESt = (ch == 't') ? '>' : (char) 0x200; break; case 0x103: // "am" or "ap" switch (ch) { case 'm': mESt = 0x104; break; case 'p': mESt = 0x105; break; default: mESt = 0x200; break; } break; case 0x104: // "amp" mESt = (ch == 'p') ? '&' : (char) 0x200; break; case 0x105: // "apo" mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200; break; case 0x106: // "apos" mESt = (ch == 's') ? '\'' : (char) 0x200; break; case 0x107: // "qu" mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200; break; case 0x108: // "quo" mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200; break; case 0x109: // "quot" mESt = (ch == 't') ? '\"' : (char) 0x200; break; case '<': // "lt" case '>': // "gt" case '&': // "amp" case '\'': // "apos" case '\"': // "quot" mESt = 0x200; default: break; } }
Sets up a new input source on the top of the input stack. Note, the first byte returned by the entity's byte stream has to be the first byte in the entity. However, the parser does not expect the byte order mask in both cases when encoding is provided by the input source.
Params:
  • is – A new input source to set up.
Throws:
  • IOException – If any IO errors occur.
  • Exception – is parser specific exception form panic method.
/** * Sets up a new input source on the top of the input stack. Note, the first * byte returned by the entity's byte stream has to be the first byte in the * entity. However, the parser does not expect the byte order mask in both * cases when encoding is provided by the input source. * * @param is A new input source to set up. * @exception IOException If any IO errors occur. * @exception Exception is parser specific exception form panic method. */
protected void setinp(InputSource is) throws Exception { Reader reader = null; mChIdx = 0; mChLen = 0; mChars = mInp.chars; mInp.src = null; if (mPh < PH_DOC_START) { mIsSAlone = false; // default [#2.9] } mIsSAloneSet = false; if (is.getCharacterStream() != null) { // Ignore encoding in the xml text decl. reader = is.getCharacterStream(); xml(reader); } else if (is.getByteStream() != null) { String expenc; if (is.getEncoding() != null) { // Ignore encoding in the xml text decl. expenc = is.getEncoding().toUpperCase(); if (expenc.equals("UTF-16")) { reader = bom(is.getByteStream(), 'U'); // UTF-16 [#4.3.3] } else { reader = enc(expenc, is.getByteStream()); } xml(reader); } else { // Get encoding from BOM or the xml text decl. reader = bom(is.getByteStream(), ' '); /** * [#4.3.3] requires BOM for UTF-16, however, it's not uncommon * that it may be missing. A mature technique exists in Xerces * to further check for possible UTF-16 encoding */ if (reader == null) { reader = utf16(is.getByteStream()); } if (reader == null) { // Encoding is defined by the xml text decl. reader = enc("UTF-8", is.getByteStream()); expenc = xml(reader); if (!expenc.equals("UTF-8")) { if (expenc.startsWith("UTF-16")) { panic(FAULT); // UTF-16 must have BOM [#4.3.3] } reader = enc(expenc, is.getByteStream()); } } else { // Encoding is defined by the BOM. xml(reader); } } } else { // There is no support for public/system identifiers. panic(FAULT); } mInp.src = reader; mInp.pubid = is.getPublicId(); mInp.sysid = is.getSystemId(); }
Determines the entity encoding. This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the first byte returned by the entity's byte stream has to be the first byte in the entity. Also, there is no support for UCS-4.
Params:
  • is – A byte stream of the entity.
  • hint – An encoding hint, character U means UTF-16.
Throws:
Returns:a reader constructed from the BOM or UTF-8 by default.
/** * Determines the entity encoding. * * This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the * first byte returned by the entity's byte stream has to be the first byte * in the entity. Also, there is no support for UCS-4. * * @param is A byte stream of the entity. * @param hint An encoding hint, character U means UTF-16. * @return a reader constructed from the BOM or UTF-8 by default. * @exception Exception is parser specific exception form panic method. * @exception IOException */
private Reader bom(InputStream is, char hint) throws Exception { int val = is.read(); switch (val) { case 0xef: // UTF-8 if (hint == 'U') // must be UTF-16 { panic(FAULT); } if (is.read() != 0xbb) { panic(FAULT); } if (is.read() != 0xbf) { panic(FAULT); } return new ReaderUTF8(is); case 0xfe: // UTF-16, big-endian if (is.read() != 0xff) { panic(FAULT); } return new ReaderUTF16(is, 'b'); case 0xff: // UTF-16, little-endian if (is.read() != 0xfe) { panic(FAULT); } return new ReaderUTF16(is, 'l'); case -1: mChars[mChIdx++] = EOS; return new ReaderUTF8(is); default: if (hint == 'U') // must be UTF-16 { panic(FAULT); } // Read the rest of UTF-8 character switch (val & 0xf0) { case 0xc0: case 0xd0: mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f)); break; case 0xe0: mChars[mChIdx++] = (char) (((val & 0x0f) << 12) | ((is.read() & 0x3f) << 6) | (is.read() & 0x3f)); break; case 0xf0: // UCS-4 character throw new UnsupportedEncodingException(); default: mChars[mChIdx++] = (char) val; break; } return null; } }
Using a mature technique from Xerces, this method checks further after the bom method above to see if the encoding is UTF-16
Params:
  • is – A byte stream of the entity.
Throws:
Returns:a reader, may be null
/** * Using a mature technique from Xerces, this method checks further after * the bom method above to see if the encoding is UTF-16 * * @param is A byte stream of the entity. * @return a reader, may be null * @exception Exception is parser specific exception form panic method. * @exception IOException */
private Reader utf16(InputStream is) throws Exception { if (mChIdx != 0) { // The bom method has read ONE byte into the buffer. byte b0 = (byte)mChars[0]; if (b0 == 0x00 || b0 == 0x3C) { int b1 = is.read(); int b2 = is.read(); int b3 = is.read(); if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM mChars[0] = (char)(b1); mChars[mChIdx++] = (char)(b3); return new ReaderUTF16(is, 'b'); } else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM mChars[0] = (char)(b0); mChars[mChIdx++] = (char)(b2); return new ReaderUTF16(is, 'l'); } else { /* not every InputStream supports reset, so we have to remember * the state for further parsing */ mChars[0] = (char)(b0); mChars[mChIdx++] = (char)(b1); mChars[mChIdx++] = (char)(b2); mChars[mChIdx++] = (char)(b3); } } } return null; }
Parses the xml text declaration. This method gets encoding from the xml text declaration [#4.3.1] if any. The method assumes the buffer (mChars) is big enough to accommodate whole xml text declaration.
Params:
  • reader – is entity reader.
Throws:
Returns:The xml text declaration encoding or default UTF-8 encoding.
/** * Parses the xml text declaration. * * This method gets encoding from the xml text declaration [#4.3.1] if any. * The method assumes the buffer (mChars) is big enough to accommodate whole * xml text declaration. * * @param reader is entity reader. * @return The xml text declaration encoding or default UTF-8 encoding. * @exception Exception is parser specific exception form panic method. * @exception IOException */
private String xml(Reader reader) throws Exception { String str = null; String enc = "UTF-8"; char ch; int val; short st = 0; int byteRead = mChIdx; //number of bytes read prior to entering this method while (st >= 0 && mChIdx < mChars.length) { if (st < byteRead) { ch = mChars[st]; } else { ch = ((val = reader.read()) >= 0) ? (char) val : EOS; mChars[mChIdx++] = ch; } switch (st) { case 0: // read '<' of xml declaration switch (ch) { case '<': st = 1; break; case 0xfeff: // the byte order mask ch = ((val = reader.read()) >= 0) ? (char) val : EOS; mChars[mChIdx - 1] = ch; st = (short) ((ch == '<') ? 1 : -1); break; default: st = -1; break; } break; case 1: // read '?' of xml declaration [#4.3.1] st = (short) ((ch == '?') ? 2 : -1); break; case 2: // read 'x' of xml declaration [#4.3.1] st = (short) ((ch == 'x') ? 3 : -1); break; case 3: // read 'm' of xml declaration [#4.3.1] st = (short) ((ch == 'm') ? 4 : -1); break; case 4: // read 'l' of xml declaration [#4.3.1] st = (short) ((ch == 'l') ? 5 : -1); break; case 5: // read white space after 'xml' switch (ch) { case ' ': case '\t': case '\r': case '\n': st = 6; break; default: st = -1; break; } break; case 6: // read content of xml declaration switch (ch) { case '?': st = 7; break; case EOS: st = -2; break; default: break; } break; case 7: // read '>' after '?' of xml declaration switch (ch) { case '>': case EOS: st = -2; break; default: st = 6; break; } break; default: panic(FAULT); break; } } mChLen = mChIdx; mChIdx = 0; // If there is no xml text declaration, the encoding is default. if (st == -1) { return enc; } mChIdx = 5; // the first white space after "<?xml" // Parse the xml text declaration for (st = 0; st >= 0;) { ch = getch(); switch (st) { case 0: // skip spaces after the xml declaration name if (chtyp(ch) != ' ') { bkch(); st = 1; } break; case 1: // read xml declaration version case 2: // read xml declaration encoding or standalone case 3: // read xml declaration standalone switch (chtyp(ch)) { case 'a': case 'A': case '_': bkch(); str = name(false).toLowerCase(); if ("version".equals(str) == true) { if (st != 1) { panic(FAULT); } if ("1.0".equals(eqstr('=')) != true) { panic(FAULT); } mInp.xmlver = 0x0100; st = 2; } else if ("encoding".equals(str) == true) { if (st != 2) { panic(FAULT); } mInp.xmlenc = eqstr('=').toUpperCase(); enc = mInp.xmlenc; st = 3; } else if ("standalone".equals(str) == true) { if ((st == 1) || (mPh >= PH_DOC_START)) // [#4.3.1] { panic(FAULT); } str = eqstr('=').toLowerCase(); // Check the 'standalone' value and use it [#5.1] if (str.equals("yes") == true) { mIsSAlone = true; } else if (str.equals("no") == true) { mIsSAlone = false; } else { panic(FAULT); } mIsSAloneSet = true; st = 4; } else { panic(FAULT); } break; case ' ': break; case '?': if (st == 1) { panic(FAULT); } bkch(); st = 4; break; default: panic(FAULT); } break; case 4: // end of xml declaration switch (chtyp(ch)) { case '?': if (getch() != '>') { panic(FAULT); } if (mPh <= PH_DOC_START) { mPh = PH_MISC_DTD; // misc before DTD } st = -1; break; case ' ': break; default: panic(FAULT); } break; default: panic(FAULT); } } return enc; }
Sets up the document reader.
Params:
  • name – an encoding name.
  • is – the document byte input stream.
Throws:
Returns:a reader constructed from encoding name and input stream.
/** * Sets up the document reader. * * @param name an encoding name. * @param is the document byte input stream. * @return a reader constructed from encoding name and input stream. * @exception UnsupportedEncodingException */
private Reader enc(String name, InputStream is) throws UnsupportedEncodingException { // DO NOT CLOSE current reader if any! if (name.equals("UTF-8")) { return new ReaderUTF8(is); } else if (name.equals("UTF-16LE")) { return new ReaderUTF16(is, 'l'); } else if (name.equals("UTF-16BE")) { return new ReaderUTF16(is, 'b'); } else { return new InputStreamReader(is, name); } }
Sets up current input on the top of the input stack.
Params:
  • inp – A new input to set up.
/** * Sets up current input on the top of the input stack. * * @param inp A new input to set up. */
protected void push(Input inp) { mInp.chLen = mChLen; mInp.chIdx = mChIdx; inp.next = mInp; mInp = inp; mChars = inp.chars; mChLen = inp.chLen; mChIdx = inp.chIdx; }
Restores previous input on the top of the input stack.
/** * Restores previous input on the top of the input stack. */
protected void pop() { if (mInp.src != null) { try { mInp.src.close(); } catch (IOException ioe) { } mInp.src = null; } mInp = mInp.next; if (mInp != null) { mChars = mInp.chars; mChLen = mInp.chLen; mChIdx = mInp.chIdx; } else { mChars = null; mChLen = 0; mChIdx = 0; } }
Maps a character to its type. Possible character type values are:
  • ' ' - for any kind of whitespace character;
  • 'a' - for any lower case alphabetical character value;
  • 'A' - for any upper case alphabetical character value;
  • 'd' - for any decimal digit character value;
  • 'z' - for any character less than ' ' except '\t', '\n', '\r';
  • 'X' - for any not ASCII character;
  • 'Z' - for EOS character.
An ASCII (7 bit) character which does not fall in any category listed above is mapped to itself.
Params:
  • ch – The character to map.
Returns:The type of character.
/** * Maps a character to its type. * * Possible character type values are: * <ul> * <li>' ' - for any kind of whitespace character;</li> * <li>'a' - for any lower case alphabetical character value;</li> * <li>'A' - for any upper case alphabetical character value;</li> * <li>'d' - for any decimal digit character value;</li> * <li>'z' - for any character less than ' ' except '\t', '\n', '\r';</li> * <li>'X' - for any not ASCII character;</li> * <li>'Z' - for EOS character.</li> * </ul> * An ASCII (7 bit) character which does not fall in any category * listed above is mapped to itself. * * @param ch The character to map. * @return The type of character. */
protected char chtyp(char ch) { if (ch < 0x80) { return (char) asctyp[ch]; } return (ch != EOS) ? 'X' : 'Z'; }
Retrives the next character in the document.
Returns:The next character in the document.
/** * Retrives the next character in the document. * * @return The next character in the document. */
protected char getch() throws IOException { if (mChIdx >= mChLen) { if (mInp.src == null) { pop(); // remove internal entity return getch(); } // Read new portion of the document characters int Num = mInp.src.read(mChars, 0, mChars.length); if (Num < 0) { if (mInp != mDoc) { pop(); // restore the previous input return getch(); } else { mChars[0] = EOS; mChLen = 1; } } else { mChLen = Num; } mChIdx = 0; } return mChars[mChIdx++]; }
Puts back the last read character. This method MUST NOT be called more then once after each call of getch method.
/** * Puts back the last read character. * * This method <strong>MUST NOT</strong> be called more then once after each * call of {@link #getch getch} method. */
protected void bkch() throws Exception { if (mChIdx <= 0) { panic(FAULT); } mChIdx--; }
Sets the current character.
Params:
  • ch – The character to set.
/** * Sets the current character. * * @param ch The character to set. */
protected void setch(char ch) { mChars[mChIdx] = ch; }
Finds a pair in the pair chain by a qualified name.
Params:
  • chain – The first element of the chain of pairs.
  • qname – The qualified name.
Returns:A pair with the specified qualified name or null.
/** * Finds a pair in the pair chain by a qualified name. * * @param chain The first element of the chain of pairs. * @param qname The qualified name. * @return A pair with the specified qualified name or null. */
protected Pair find(Pair chain, char[] qname) { for (Pair pair = chain; pair != null; pair = pair.next) { if (pair.eqname(qname) == true) { return pair; } } return null; }
Provedes an instance of a pair.
Params:
  • next – The reference to a next pair.
Returns:An instance of a pair.
/** * Provedes an instance of a pair. * * @param next The reference to a next pair. * @return An instance of a pair. */
protected Pair pair(Pair next) { Pair pair; if (mDltd != null) { pair = mDltd; mDltd = pair.next; } else { pair = new Pair(); } pair.next = next; return pair; }
Deletes an instance of a pair.
Params:
  • pair – The pair to delete.
Returns:A reference to the next pair in a chain.
/** * Deletes an instance of a pair. * * @param pair The pair to delete. * @return A reference to the next pair in a chain. */
protected Pair del(Pair pair) { Pair next = pair.next; pair.name = null; pair.value = null; pair.chars = null; pair.list = null; pair.next = mDltd; mDltd = pair; return next; } }