/*
* Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.internal.util.xml.impl;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import jdk.internal.org.xml.sax.InputSource;
import jdk.internal.org.xml.sax.SAXException;
XML non-validating parser engine.
/**
* XML non-validating parser engine.
*/
public abstract class Parser {
public static final String FAULT = "";
protected static final int BUFFSIZE_READER = 512;
protected static final int BUFFSIZE_PARSER = 128;
The end of stream character.
/**
* The end of stream character.
*/
public static final char EOS = 0xffff;
private Pair mNoNS; // there is no namespace
private Pair mXml; // the xml namespace
private Map<String, Input> mEnt; // the entities look up table
private Map<String, Input> mPEnt; // the parmeter entities look up table
protected boolean mIsSAlone; // xml decl standalone flag
protected boolean mIsSAloneSet; // standalone is explicitely set
protected boolean mIsNSAware; // if true - namespace aware mode
protected int mPh; // current phase of document processing
protected static final int PH_BEFORE_DOC = -1; // before parsing
protected static final int PH_DOC_START = 0; // document start
protected static final int PH_MISC_DTD = 1; // misc before DTD
protected static final int PH_DTD = 2; // DTD
protected static final int PH_DTD_MISC = 3; // misc after DTD
protected static final int PH_DOCELM = 4; // document's element
protected static final int PH_DOCELM_MISC = 5; // misc after element
protected static final int PH_AFTER_DOC = 6; // after parsing
protected int mEvt; // current event type
protected static final int EV_NULL = 0; // unknown
protected static final int EV_ELM = 1; // empty element
protected static final int EV_ELMS = 2; // start element
protected static final int EV_ELME = 3; // end element
protected static final int EV_TEXT = 4; // textual content
protected static final int EV_WSPC = 5; // white space content
protected static final int EV_PI = 6; // processing instruction
protected static final int EV_CDAT = 7; // character data
protected static final int EV_COMM = 8; // comment
protected static final int EV_DTD = 9; // document type definition
protected static final int EV_ENT = 10; // skipped entity
private char mESt; // built-in entity recognizer state
// mESt values:
// 0x100 : the initial state
// > 0x100 : unrecognized name
// < 0x100 : replacement character
protected char[] mBuff; // parser buffer
protected int mBuffIdx; // index of the last char
protected Pair mPref; // stack of prefixes
protected Pair mElm; // stack of elements
// mAttL.chars - element qname
// mAttL.next - next element
// mAttL.list - list of attributes defined on this element
// mAttL.list.chars - attribute qname
// mAttL.list.id - a char representing attribute's type see below
// mAttL.list.next - next attribute defined on the element
// mAttL.list.list - devault value structure or null
// mAttL.list.list.chars - "name='value' " chars array for Input
//
// Attribute type character values:
// 'i' - "ID"
// 'r' - "IDREF"
// 'R' - "IDREFS"
// 'n' - "ENTITY"
// 'N' - "ENTITIES"
// 't' - "NMTOKEN"
// 'T' - "NMTOKENS"
// 'u' - enumeration type
// 'o' - "NOTATION"
// 'c' - "CDATA"
// see also: bkeyword() and atype()
//
protected Pair mAttL; // list of defined attrs by element name
protected Input mDoc; // document entity
protected Input mInp; // stack of entities
private char[] mChars; // reading buffer
private int mChLen; // current capacity
private int mChIdx; // index to the next char
protected Attrs mAttrs; // attributes of the curr. element
private String[] mItems; // attributes array of the curr. element
private char mAttrIdx; // attributes counter/index
private String mUnent; // unresolved entity name
private Pair mDltd; // deleted objects for reuse
Default prefixes
/**
* Default prefixes
*/
private static final char NONS[];
private static final char XML[];
private static final char XMLNS[];
static {
NONS = new char[1];
NONS[0] = (char) 0;
XML = new char[4];
XML[0] = (char) 4;
XML[1] = 'x';
XML[2] = 'm';
XML[3] = 'l';
XMLNS = new char[6];
XMLNS[0] = (char) 6;
XMLNS[1] = 'x';
XMLNS[2] = 'm';
XMLNS[3] = 'l';
XMLNS[4] = 'n';
XMLNS[5] = 's';
}
ASCII character type array.
This array maps an ASCII (7 bit) character to the character type.
Possible character type values are:
- ' ' for any kind of white
space character;
- 'a' for any lower case alphabetical character
value;
- 'A' for any upper case alphabetical character value;
- 'd' for any decimal digit character value;
- 'z' for any
character less than ' ' except '\t', '\n', '\r';
An ASCII (7 bit)
character which does not fall in any category listed above is mapped to
it self.
/**
* ASCII character type array.
*
* This array maps an ASCII (7 bit) character to the character type.<br>
* Possible character type values are:<br> - ' ' for any kind of white
* space character;<br> - 'a' for any lower case alphabetical character
* value;<br> - 'A' for any upper case alphabetical character value;<br>
* - 'd' for any decimal digit character value;<br> - 'z' for any
* character less than ' ' except '\t', '\n', '\r';<br> An ASCII (7 bit)
* character which does not fall in any category listed above is mapped to
* it self.
*/
private static final byte asctyp[];
NMTOKEN character type array.
This array maps an ASCII (7 bit) character to the character type.
Possible character type values are:
- 0 for underscore ('_') or any
lower and upper case alphabetical character value;
- 1 for colon
(':') character;
- 2 for dash ('-') and dot ('.') or any decimal
digit character value;
- 3 for any kind of white space character
An ASCII (7 bit) character which does not fall in any category listed
above is mapped to 0xff.
/**
* NMTOKEN character type array.
*
* This array maps an ASCII (7 bit) character to the character type.<br>
* Possible character type values are:<br> - 0 for underscore ('_') or any
* lower and upper case alphabetical character value;<br> - 1 for colon
* (':') character;<br> - 2 for dash ('-') and dot ('.') or any decimal
* digit character value;<br> - 3 for any kind of white space character<br>
* An ASCII (7 bit) character which does not fall in any category listed
* above is mapped to 0xff.
*/
private static final byte nmttyp[];
Static constructor. Sets up the ASCII character type array which is used by asctyp
method and NMTOKEN character type array. /**
* Static constructor.
*
* Sets up the ASCII character type array which is used by
* {@link #asctyp asctyp} method and NMTOKEN character type array.
*/
static {
short i = 0;
asctyp = new byte[0x80];
while (i < ' ') {
asctyp[i++] = (byte) 'z';
}
asctyp['\t'] = (byte) ' ';
asctyp['\r'] = (byte) ' ';
asctyp['\n'] = (byte) ' ';
while (i < '0') {
asctyp[i] = (byte) i++;
}
while (i <= '9') {
asctyp[i++] = (byte) 'd';
}
while (i < 'A') {
asctyp[i] = (byte) i++;
}
while (i <= 'Z') {
asctyp[i++] = (byte) 'A';
}
while (i < 'a') {
asctyp[i] = (byte) i++;
}
while (i <= 'z') {
asctyp[i++] = (byte) 'a';
}
while (i < 0x80) {
asctyp[i] = (byte) i++;
}
nmttyp = new byte[0x80];
for (i = 0; i < '0'; i++) {
nmttyp[i] = (byte) 0xff;
}
while (i <= '9') {
nmttyp[i++] = (byte) 2; // digits
}
while (i < 'A') {
nmttyp[i++] = (byte) 0xff;
}
// skiped upper case alphabetical character are already 0
for (i = '['; i < 'a'; i++) {
nmttyp[i] = (byte) 0xff;
}
// skiped lower case alphabetical character are already 0
for (i = '{'; i < 0x80; i++) {
nmttyp[i] = (byte) 0xff;
}
nmttyp['_'] = 0;
nmttyp[':'] = 1;
nmttyp['.'] = 2;
nmttyp['-'] = 2;
nmttyp[' '] = 3;
nmttyp['\t'] = 3;
nmttyp['\r'] = 3;
nmttyp['\n'] = 3;
}
Constructor.
/**
* Constructor.
*/
protected Parser() {
mPh = PH_BEFORE_DOC; // before parsing
// Initialize the parser
mBuff = new char[BUFFSIZE_PARSER];
mAttrs = new Attrs();
// Default namespace
mPref = pair(mPref);
mPref.name = "";
mPref.value = "";
mPref.chars = NONS;
mNoNS = mPref; // no namespace
// XML namespace
mPref = pair(mPref);
mPref.name = "xml";
mPref.value = "http://www.w3.org/XML/1998/namespace";
mPref.chars = XML;
mXml = mPref; // XML namespace
}
Initializes parser's internals. Note, current input has to be set before
this method is called.
/**
* Initializes parser's internals. Note, current input has to be set before
* this method is called.
*/
protected void init() {
mUnent = null;
mElm = null;
mPref = mXml;
mAttL = null;
mPEnt = new HashMap<>();
mEnt = new HashMap<>();
mDoc = mInp; // current input is document entity
mChars = mInp.chars; // use document entity buffer
mPh = PH_DOC_START; // the begining of the document
}
Cleans up parser internal resources.
/**
* Cleans up parser internal resources.
*/
protected void cleanup() {
// Default attributes
while (mAttL != null) {
while (mAttL.list != null) {
if (mAttL.list.list != null) {
del(mAttL.list.list);
}
mAttL.list = del(mAttL.list);
}
mAttL = del(mAttL);
}
// Element stack
while (mElm != null) {
mElm = del(mElm);
}
// Namespace prefixes
while (mPref != mXml) {
mPref = del(mPref);
}
// Inputs
while (mInp != null) {
pop();
}
// Document reader
if ((mDoc != null) && (mDoc.src != null)) {
try {
mDoc.src.close();
} catch (IOException ioe) {
}
}
mPEnt = null;
mEnt = null;
mDoc = null;
mPh = PH_AFTER_DOC; // before documnet processing
}
Processes a portion of document. This method returns one of EV_*
constants as an identifier of the portion of document have been read.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
Returns: Identifier of processed document portion.
/**
* Processes a portion of document. This method returns one of EV_*
* constants as an identifier of the portion of document have been read.
*
* @return Identifier of processed document portion.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
protected int step() throws Exception {
mEvt = EV_NULL;
int st = 0;
while (mEvt == EV_NULL) {
char ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
switch (st) {
case 0: // all sorts of markup (dispetcher)
if (ch != '<') {
bkch();
mBuffIdx = -1; // clean parser buffer
st = 1;
break;
}
switch (getch()) {
case '/': // the end of the element content
mEvt = EV_ELME;
if (mElm == null) {
panic(FAULT);
}
// Check element's open/close tags balance
mBuffIdx = -1; // clean parser buffer
bname(mIsNSAware);
char[] chars = mElm.chars;
if (chars.length == (mBuffIdx + 1)) {
for (char i = 1; i <= mBuffIdx; i += 1) {
if (chars[i] != mBuff[i]) {
panic(FAULT);
}
}
} else {
panic(FAULT);
}
// Skip white spaces before '>'
if (wsskip() != '>') {
panic(FAULT);
}
getch(); // read '>'
break;
case '!': // a comment or a CDATA
ch = getch();
bkch();
switch (ch) {
case '-': // must be a comment
mEvt = EV_COMM;
comm();
break;
case '[': // must be a CDATA section
mEvt = EV_CDAT;
cdat();
break;
default: // must be 'DOCTYPE'
mEvt = EV_DTD;
dtd();
break;
}
break;
case '?': // processing instruction
mEvt = EV_PI;
pi();
break;
default: // must be the first char of an xml name
bkch();
// Read an element name and put it on top of the
// element stack
mElm = pair(mElm); // add new element to the stack
mElm.chars = qname(mIsNSAware);
mElm.name = mElm.local();
mElm.id = (mElm.next != null) ? mElm.next.id : 0; // flags
mElm.num = 0; // namespace counter
// Find the list of defined attributs of the current
// element
Pair elm = find(mAttL, mElm.chars);
mElm.list = (elm != null) ? elm.list : null;
// Read attributes till the end of the element tag
mAttrIdx = 0;
Pair att = pair(null);
att.num = 0; // clear attribute's flags
attr(att); // get all attributes inc. defaults
del(att);
mElm.value = (mIsNSAware) ? rslv(mElm.chars) : null;
// Skip white spaces before '>'
switch (wsskip()) {
case '>':
getch(); // read '>'
mEvt = EV_ELMS;
break;
case '/':
getch(); // read '/'
if (getch() != '>') // read '>'
{
panic(FAULT);
}
mEvt = EV_ELM;
break;
default:
panic(FAULT);
}
break;
}
break;
case 1: // read white space
switch (ch) {
case ' ':
case '\t':
case '\n':
bappend(ch);
break;
case '\r': // EOL processing [#2.11]
if (getch() != '\n') {
bkch();
}
bappend('\n');
break;
case '<':
mEvt = EV_WSPC;
bkch();
bflash_ws();
break;
default:
bkch();
st = 2;
break;
}
break;
case 2: // read the text content of the element
switch (ch) {
case '&':
if (mUnent == null) {
// There was no unresolved entity on previous step.
if ((mUnent = ent('x')) != null) {
mEvt = EV_TEXT;
bkch(); // move back to ';' after entity name
setch('&'); // parser must be back on next step
bflash();
}
} else {
// There was unresolved entity on previous step.
mEvt = EV_ENT;
skippedEnt(mUnent);
mUnent = null;
}
break;
case '<':
mEvt = EV_TEXT;
bkch();
bflash();
break;
case '\r': // EOL processing [#2.11]
if (getch() != '\n') {
bkch();
}
bappend('\n');
break;
case EOS:
panic(FAULT);
default:
bappend(ch);
break;
}
break;
default:
panic(FAULT);
}
}
return mEvt;
}
Parses the document type declaration.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses the document type declaration.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private void dtd() throws Exception {
char ch;
String str = null;
String name = null;
Pair psid = null;
// read 'DOCTYPE'
if ("DOCTYPE".equals(name(false)) != true) {
panic(FAULT);
}
mPh = PH_DTD; // DTD
for (short st = 0; st >= 0;) {
ch = getch();
switch (st) {
case 0: // read the document type name
if (chtyp(ch) != ' ') {
bkch();
name = name(mIsNSAware);
wsskip();
st = 1; // read 'PUPLIC' or 'SYSTEM'
}
break;
case 1: // read 'PUPLIC' or 'SYSTEM'
switch (chtyp(ch)) {
case 'A':
bkch();
psid = pubsys(' ');
st = 2; // skip spaces before internal subset
docType(name, psid.name, psid.value);
break;
case '[':
bkch();
st = 2; // skip spaces before internal subset
docType(name, null, null);
break;
case '>':
bkch();
st = 3; // skip spaces after internal subset
docType(name, null, null);
break;
default:
panic(FAULT);
}
break;
case 2: // skip spaces before internal subset
switch (chtyp(ch)) {
case '[':
// Process internal subset
dtdsub();
st = 3; // skip spaces after internal subset
break;
case '>':
// There is no internal subset
bkch();
st = 3; // skip spaces after internal subset
break;
case ' ':
// skip white spaces
break;
default:
panic(FAULT);
}
break;
case 3: // skip spaces after internal subset
switch (chtyp(ch)) {
case '>':
if (psid != null) {
// Report the DTD external subset
InputSource is = resolveEnt(name, psid.name, psid.value);
if (is != null) {
if (mIsSAlone == false) {
// Set the end of DTD external subset char
bkch();
setch(']');
// Set the DTD external subset InputSource
push(new Input(BUFFSIZE_READER));
setinp(is);
mInp.pubid = psid.name;
mInp.sysid = psid.value;
// Parse the DTD external subset
dtdsub();
} else {
// Unresolved DTD external subset
skippedEnt("[dtd]");
// Release reader and stream
if (is.getCharacterStream() != null) {
try {
is.getCharacterStream().close();
} catch (IOException ioe) {
}
}
if (is.getByteStream() != null) {
try {
is.getByteStream().close();
} catch (IOException ioe) {
}
}
}
} else {
// Unresolved DTD external subset
skippedEnt("[dtd]");
}
del(psid);
}
st = -1; // end of DTD
break;
case ' ':
// skip white spaces
break;
default:
panic(FAULT);
}
break;
default:
panic(FAULT);
}
}
}
Parses the document type declaration subset.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses the document type declaration subset.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private void dtdsub() throws Exception {
char ch;
for (short st = 0; st >= 0;) {
ch = getch();
switch (st) {
case 0: // skip white spaces before a declaration
switch (chtyp(ch)) {
case '<':
ch = getch();
switch (ch) {
case '?':
pi();
break;
case '!':
ch = getch();
bkch();
if (ch == '-') {
comm();
break;
}
// A markup or an entity declaration
bntok();
switch (bkeyword()) {
case 'n':
dtdent();
break;
case 'a':
dtdattl(); // parse attributes declaration
break;
case 'e':
dtdelm(); // parse element declaration
break;
case 'o':
dtdnot(); // parse notation declaration
break;
default:
panic(FAULT); // unsupported markup declaration
break;
}
st = 1; // read the end of declaration
break;
default:
panic(FAULT);
break;
}
break;
case '%':
// A parameter entity reference
pent(' ');
break;
case ']':
// End of DTD subset
st = -1;
break;
case ' ':
// Skip white spaces
break;
case 'Z':
// End of stream
if (getch() != ']') {
panic(FAULT);
}
st = -1;
break;
default:
panic(FAULT);
}
break;
case 1: // read the end of declaration
switch (ch) {
case '>': // there is no notation
st = 0; // skip white spaces before a declaration
break;
case ' ':
case '\n':
case '\r':
case '\t':
// Skip white spaces
break;
default:
panic(FAULT);
break;
}
break;
default:
panic(FAULT);
}
}
}
Parses an entity declaration. This method fills the general (
mEnt
) and parameter
(
mPEnt
) entity look up table.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses an entity declaration. This method fills the general (
* <code>mEnt</code>) and parameter
* (
* <code>mPEnt</code>) entity look up table.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private void dtdent() throws Exception {
String str = null;
char[] val = null;
Input inp = null;
Pair ids = null;
char ch;
for (short st = 0; st >= 0;) {
ch = getch();
switch (st) {
case 0: // skip white spaces before entity name
switch (chtyp(ch)) {
case ' ':
// Skip white spaces
break;
case '%':
// Parameter entity or parameter entity declaration.
ch = getch();
bkch();
if (chtyp(ch) == ' ') {
// Parameter entity declaration.
wsskip();
str = name(false);
switch (chtyp(wsskip())) {
case 'A':
// Read the external identifier
ids = pubsys(' ');
if (wsskip() == '>') {
// External parsed entity
if (mPEnt.containsKey(str) == false) { // [#4.2]
inp = new Input();
inp.pubid = ids.name;
inp.sysid = ids.value;
mPEnt.put(str, inp);
}
} else {
panic(FAULT);
}
del(ids);
st = -1; // the end of declaration
break;
case '\"':
case '\'':
// Read the parameter entity value
bqstr('d');
// Create the parameter entity value
val = new char[mBuffIdx + 1];
System.arraycopy(mBuff, 1, val, 1, val.length - 1);
// Add surrounding spaces [#4.4.8]
val[0] = ' ';
// Add the entity to the entity look up table
if (mPEnt.containsKey(str) == false) { // [#4.2]
inp = new Input(val);
inp.pubid = mInp.pubid;
inp.sysid = mInp.sysid;
inp.xmlenc = mInp.xmlenc;
inp.xmlver = mInp.xmlver;
mPEnt.put(str, inp);
}
st = -1; // the end of declaration
break;
default:
panic(FAULT);
break;
}
} else {
// Parameter entity reference.
pent(' ');
}
break;
default:
bkch();
str = name(false);
st = 1; // read entity declaration value
break;
}
break;
case 1: // read entity declaration value
switch (chtyp(ch)) {
case '\"': // internal entity
case '\'':
bkch();
bqstr('d'); // read a string into the buffer
if (mEnt.get(str) == null) {
// Create general entity value
val = new char[mBuffIdx];
System.arraycopy(mBuff, 1, val, 0, val.length);
// Add the entity to the entity look up table
if (mEnt.containsKey(str) == false) { // [#4.2]
inp = new Input(val);
inp.pubid = mInp.pubid;
inp.sysid = mInp.sysid;
inp.xmlenc = mInp.xmlenc;
inp.xmlver = mInp.xmlver;
mEnt.put(str, inp);
}
}
st = -1; // the end of declaration
break;
case 'A': // external entity
bkch();
ids = pubsys(' ');
switch (wsskip()) {
case '>': // external parsed entity
if (mEnt.containsKey(str) == false) { // [#4.2]
inp = new Input();
inp.pubid = ids.name;
inp.sysid = ids.value;
mEnt.put(str, inp);
}
break;
case 'N': // external general unparsed entity
if ("NDATA".equals(name(false)) == true) {
wsskip();
unparsedEntDecl(str, ids.name, ids.value, name(false));
break;
}
default:
panic(FAULT);
break;
}
del(ids);
st = -1; // the end of declaration
break;
case ' ':
// Skip white spaces
break;
default:
panic(FAULT);
break;
}
break;
default:
panic(FAULT);
}
}
}
Parses an element declaration.
This method parses the declaration up to the closing angle bracket.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses an element declaration.
*
* This method parses the declaration up to the closing angle bracket.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private void dtdelm() throws Exception {
// This is stub implementation which skips an element
// declaration.
wsskip();
name(mIsNSAware);
char ch;
while (true) {
ch = getch();
switch (ch) {
case '>':
bkch();
return;
case EOS:
panic(FAULT);
default:
break;
}
}
}
Parses an attribute list declaration.
This method parses the declaration up to the closing angle bracket.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses an attribute list declaration.
*
* This method parses the declaration up to the closing angle bracket.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private void dtdattl() throws Exception {
char elmqn[] = null;
Pair elm = null;
char ch;
for (short st = 0; st >= 0;) {
ch = getch();
switch (st) {
case 0: // read the element name
switch (chtyp(ch)) {
case 'a':
case 'A':
case '_':
case 'X':
case ':':
bkch();
// Get the element from the list or add a new one.
elmqn = qname(mIsNSAware);
elm = find(mAttL, elmqn);
if (elm == null) {
elm = pair(mAttL);
elm.chars = elmqn;
mAttL = elm;
}
st = 1; // read an attribute declaration
break;
case ' ':
break;
case '%':
pent(' ');
break;
default:
panic(FAULT);
break;
}
break;
case 1: // read an attribute declaration
switch (chtyp(ch)) {
case 'a':
case 'A':
case '_':
case 'X':
case ':':
bkch();
dtdatt(elm);
if (wsskip() == '>') {
return;
}
break;
case ' ':
break;
case '%':
pent(' ');
break;
default:
panic(FAULT);
break;
}
break;
default:
panic(FAULT);
break;
}
}
}
Parses an attribute declaration.
The attribute uses the following fields of Pair object: chars - characters
of qualified name id - the type identifier of the attribute list - a pair
which holds the default value (chars field)
Params: - elm – An object which represents all defined attributes on an
element.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses an attribute declaration.
*
* The attribute uses the following fields of Pair object: chars - characters
* of qualified name id - the type identifier of the attribute list - a pair
* which holds the default value (chars field)
*
* @param elm An object which represents all defined attributes on an
* element.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private void dtdatt(Pair elm) throws Exception {
char attqn[] = null;
Pair att = null;
char ch;
for (short st = 0; st >= 0;) {
ch = getch();
switch (st) {
case 0: // the attribute name
switch (chtyp(ch)) {
case 'a':
case 'A':
case '_':
case 'X':
case ':':
bkch();
// Get the attribute from the list or add a new one.
attqn = qname(mIsNSAware);
att = find(elm.list, attqn);
if (att == null) {
// New attribute declaration
att = pair(elm.list);
att.chars = attqn;
elm.list = att;
} else {
// Do not override the attribute declaration [#3.3]
att = pair(null);
att.chars = attqn;
att.id = 'c';
}
wsskip();
st = 1;
break;
case '%':
pent(' ');
break;
case ' ':
break;
default:
panic(FAULT);
break;
}
break;
case 1: // the attribute type
switch (chtyp(ch)) {
case '(':
att.id = 'u'; // enumeration type
st = 2; // read the first element of the list
break;
case '%':
pent(' ');
break;
case ' ':
break;
default:
bkch();
bntok(); // read type id
att.id = bkeyword();
switch (att.id) {
case 'o': // NOTATION
if (wsskip() != '(') {
panic(FAULT);
}
ch = getch();
st = 2; // read the first element of the list
break;
case 'i': // ID
case 'r': // IDREF
case 'R': // IDREFS
case 'n': // ENTITY
case 'N': // ENTITIES
case 't': // NMTOKEN
case 'T': // NMTOKENS
case 'c': // CDATA
wsskip();
st = 4; // read default declaration
break;
default:
panic(FAULT);
break;
}
break;
}
break;
case 2: // read the first element of the list
switch (chtyp(ch)) {
case 'a':
case 'A':
case 'd':
case '.':
case ':':
case '-':
case '_':
case 'X':
bkch();
switch (att.id) {
case 'u': // enumeration type
bntok();
break;
case 'o': // NOTATION
mBuffIdx = -1;
bname(false);
break;
default:
panic(FAULT);
break;
}
wsskip();
st = 3; // read next element of the list
break;
case '%':
pent(' ');
break;
case ' ':
break;
default:
panic(FAULT);
break;
}
break;
case 3: // read next element of the list
switch (ch) {
case ')':
wsskip();
st = 4; // read default declaration
break;
case '|':
wsskip();
switch (att.id) {
case 'u': // enumeration type
bntok();
break;
case 'o': // NOTATION
mBuffIdx = -1;
bname(false);
break;
default:
panic(FAULT);
break;
}
wsskip();
break;
case '%':
pent(' ');
break;
default:
panic(FAULT);
break;
}
break;
case 4: // read default declaration
switch (ch) {
case '#':
bntok();
switch (bkeyword()) {
case 'F': // FIXED
switch (wsskip()) {
case '\"':
case '\'':
st = 5; // read the default value
break;
case EOS:
panic(FAULT);
default:
st = -1;
break;
}
break;
case 'Q': // REQUIRED
case 'I': // IMPLIED
st = -1;
break;
default:
panic(FAULT);
break;
}
break;
case '\"':
case '\'':
bkch();
st = 5; // read the default value
break;
case ' ':
case '\n':
case '\r':
case '\t':
break;
case '%':
pent(' ');
break;
default:
bkch();
st = -1;
break;
}
break;
case 5: // read the default value
switch (ch) {
case '\"':
case '\'':
bkch();
bqstr('d'); // the value in the mBuff now
att.list = pair(null);
// Create a string like "attqname='value' "
att.list.chars = new char[att.chars.length + mBuffIdx + 3];
System.arraycopy(
att.chars, 1, att.list.chars, 0, att.chars.length - 1);
att.list.chars[att.chars.length - 1] = '=';
att.list.chars[att.chars.length] = ch;
System.arraycopy(
mBuff, 1, att.list.chars, att.chars.length + 1, mBuffIdx);
att.list.chars[att.chars.length + mBuffIdx + 1] = ch;
att.list.chars[att.chars.length + mBuffIdx + 2] = ' ';
st = -1;
break;
default:
panic(FAULT);
break;
}
break;
default:
panic(FAULT);
break;
}
}
}
Parses a notation declaration.
This method parses the declaration up to the closing angle bracket.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses a notation declaration.
*
* This method parses the declaration up to the closing angle bracket.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private void dtdnot() throws Exception {
wsskip();
String name = name(false);
wsskip();
Pair ids = pubsys('N');
notDecl(name, ids.name, ids.value);
del(ids);
}
Parses an attribute.
This recursive method is responsible for prefix addition
(
mPref
) on the way down. The element's start tag end triggers
the return process. The method then on it's way back resolves prefixes
and accumulates attributes.
att.num
carries attribute flags where: 0x1 - attribute is
declared in DTD (attribute decalration had been read); 0x2 - attribute's
default value is used.
Params: - att – An object which reprecents current attribute.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses an attribute.
*
* This recursive method is responsible for prefix addition
* (
* <code>mPref</code>) on the way down. The element's start tag end triggers
* the return process. The method then on it's way back resolves prefixes
* and accumulates attributes.
*
* <p><code>att.num</code> carries attribute flags where: 0x1 - attribute is
* declared in DTD (attribute decalration had been read); 0x2 - attribute's
* default value is used.</p>
*
* @param att An object which reprecents current attribute.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private void attr(Pair att) throws Exception {
switch (wsskip()) {
case '/':
case '>':
if ((att.num & 0x2) == 0) { // all attributes have been read
att.num |= 0x2; // set default attribute flag
Input inp = mInp;
// Go through all attributes defined on current element.
for (Pair def = mElm.list; def != null; def = def.next) {
if (def.list == null) // no default value
{
continue;
}
// Go through all attributes defined on current
// element and add defaults.
Pair act = find(att.next, def.chars);
if (act == null) {
push(new Input(def.list.chars));
}
}
if (mInp != inp) { // defaults have been added
attr(att);
return;
}
}
// Ensure the attribute string array capacity
mAttrs.setLength(mAttrIdx);
mItems = mAttrs.mItems;
return;
case EOS:
panic(FAULT);
default:
// Read the attribute name and value
att.chars = qname(mIsNSAware);
att.name = att.local();
String type = atype(att); // sets attribute's type on att.id
wsskip();
if (getch() != '=') {
panic(FAULT);
}
bqstr((char) att.id); // read the value with normalization.
String val = new String(mBuff, 1, mBuffIdx);
Pair next = pair(att);
next.num = (att.num & ~0x1); // inherit attribute flags
// Put a namespace declaration on top of the prefix stack
if ((mIsNSAware == false) || (isdecl(att, val) == false)) {
// An ordinary attribute
mAttrIdx++;
attr(next); // recursive call to parse the next attribute
mAttrIdx--;
// Add the attribute to the attributes string array
char idx = (char) (mAttrIdx << 3);
mItems[idx + 1] = att.qname(); // attr qname
mItems[idx + 2] = (mIsNSAware) ? att.name : ""; // attr local name
mItems[idx + 3] = val; // attr value
mItems[idx + 4] = type; // attr type
switch (att.num & 0x3) {
case 0x0:
mItems[idx + 5] = null;
break;
case 0x1: // declared attribute
mItems[idx + 5] = "d";
break;
default: // 0x2, 0x3 - default attribute always declared
mItems[idx + 5] = "D";
break;
}
// Resolve the prefix if any and report the attribute
// NOTE: The attribute does not accept the default namespace.
mItems[idx + 0] = (att.chars[0] != 0) ? rslv(att.chars) : "";
} else {
// A namespace declaration. mPref.name contains prefix and
// mPref.value contains namespace URI set by isdecl method.
// Report a start of the new mapping
newPrefix();
// Recursive call to parse the next attribute
attr(next);
// NOTE: The namespace declaration is not reported.
}
del(next);
break;
}
}
Retrieves attribute type.
This method sets the type of normalization in the attribute
id
field and returns the name of attribute type.
Params: - att – An object which represents current attribute.
Throws: - Exception – is parser specific exception form panic method.
Returns: The name of the attribute type.
/**
* Retrieves attribute type.
*
* This method sets the type of normalization in the attribute
* <code>id</code> field and returns the name of attribute type.
*
* @param att An object which represents current attribute.
* @return The name of the attribute type.
* @exception Exception is parser specific exception form panic method.
*/
private String atype(Pair att)
throws Exception {
Pair attr;
// CDATA-type normalization by default [#3.3.3]
att.id = 'c';
if (mElm.list == null || (attr = find(mElm.list, att.chars)) == null) {
return "CDATA";
}
att.num |= 0x1; // attribute is declared
// Non-CDATA normalization except when the attribute type is CDATA.
att.id = 'i';
switch (attr.id) {
case 'i':
return "ID";
case 'r':
return "IDREF";
case 'R':
return "IDREFS";
case 'n':
return "ENTITY";
case 'N':
return "ENTITIES";
case 't':
return "NMTOKEN";
case 'T':
return "NMTOKENS";
case 'u':
return "NMTOKEN";
case 'o':
return "NOTATION";
case 'c':
att.id = 'c';
return "CDATA";
default:
panic(FAULT);
}
return null;
}
Parses a comment.
The '<!' part is read in dispatcher so the method starts
with first '-' after '<!'.
Throws: - Exception – is parser specific exception form panic method.
/**
* Parses a comment.
*
* The '<!' part is read in dispatcher so the method starts
* with first '-' after '<!'.
*
* @exception Exception is parser specific exception form panic method.
*/
@SuppressWarnings("fallthrough")
private void comm() throws Exception {
if (mPh == PH_DOC_START) {
mPh = PH_MISC_DTD; // misc before DTD
} // '<!' has been already read by dispetcher.
char ch;
mBuffIdx = -1;
for (short st = 0; st >= 0;) {
ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
if (ch == EOS) {
panic(FAULT);
}
switch (st) {
case 0: // first '-' of the comment open
if (ch == '-') {
st = 1;
} else {
panic(FAULT);
}
break;
case 1: // secind '-' of the comment open
if (ch == '-') {
st = 2;
} else {
panic(FAULT);
}
break;
case 2: // skip the comment body
switch (ch) {
case '-':
st = 3;
break;
default:
bappend(ch);
break;
}
break;
case 3: // second '-' of the comment close
switch (ch) {
case '-':
st = 4;
break;
default:
bappend('-');
bappend(ch);
st = 2;
break;
}
break;
case 4: // '>' of the comment close
if (ch == '>') {
comm(mBuff, mBuffIdx + 1);
st = -1;
break;
}
// else - panic [#2.5 compatibility note]
default:
panic(FAULT);
}
}
}
Parses a processing instruction.
The '<?' is read in dispatcher so the method starts with
first character of PI target name after '<?'.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses a processing instruction.
*
* The '<?' is read in dispatcher so the method starts with
* first character of PI target name after '<?'.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private void pi() throws Exception {
// '<?' has been already read by dispetcher.
char ch;
String str = null;
mBuffIdx = -1;
for (short st = 0; st >= 0;) {
ch = getch();
if (ch == EOS) {
panic(FAULT);
}
switch (st) {
case 0: // read the PI target name
switch (chtyp(ch)) {
case 'a':
case 'A':
case '_':
case ':':
case 'X':
bkch();
str = name(false);
// PI target name may not be empty string [#2.6]
// PI target name 'XML' is reserved [#2.6]
if ((str.length() == 0)
|| (mXml.name.equals(str.toLowerCase()) == true)) {
panic(FAULT);
}
// This is processing instruction
if (mPh == PH_DOC_START) // the begining of the document
{
mPh = PH_MISC_DTD; // misc before DTD
}
wsskip(); // skip spaces after the PI target name
st = 1; // accumulate the PI body
mBuffIdx = -1;
break;
default:
panic(FAULT);
}
break;
case 1: // accumulate the PI body
switch (ch) {
case '?':
st = 2; // end of the PI body
break;
default:
bappend(ch);
break;
}
break;
case 2: // end of the PI body
switch (ch) {
case '>':
// PI has been read.
pi(str, new String(mBuff, 0, mBuffIdx + 1));
st = -1;
break;
case '?':
bappend('?');
break;
default:
bappend('?');
bappend(ch);
st = 1; // accumulate the PI body
break;
}
break;
default:
panic(FAULT);
}
}
}
Parses a character data.
The '<!' part is read in dispatcher so the method starts
with first '[' after '<!'.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Parses a character data.
*
* The '<!' part is read in dispatcher so the method starts
* with first '[' after '<!'.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private void cdat()
throws Exception {
// '<!' has been already read by dispetcher.
char ch;
mBuffIdx = -1;
for (short st = 0; st >= 0;) {
ch = getch();
switch (st) {
case 0: // the first '[' of the CDATA open
if (ch == '[') {
st = 1;
} else {
panic(FAULT);
}
break;
case 1: // read "CDATA"
if (chtyp(ch) == 'A') {
bappend(ch);
} else {
if ("CDATA".equals(
new String(mBuff, 0, mBuffIdx + 1)) != true) {
panic(FAULT);
}
bkch();
st = 2;
}
break;
case 2: // the second '[' of the CDATA open
if (ch != '[') {
panic(FAULT);
}
mBuffIdx = -1;
st = 3;
break;
case 3: // read data before the first ']'
if (ch != ']') {
bappend(ch);
} else {
st = 4;
}
break;
case 4: // read the second ']' or continue to read the data
if (ch != ']') {
bappend(']');
bappend(ch);
st = 3;
} else {
st = 5;
}
break;
case 5: // read '>' or continue to read the data
switch (ch) {
case ']':
bappend(']');
break;
case '>':
bflash();
st = -1;
break;
default:
bappend(']');
bappend(']');
bappend(ch);
st = 3;
break;
}
break;
default:
panic(FAULT);
}
}
}
Reads a xml name.
The xml name must conform "Namespaces in XML" specification. Therefore
the ':' character is not allowed in the name. This method should be used
for PI and entity names which may not have a namespace according to the
specification mentioned above.
Params: - ns – The true value turns namespace conformance on.
Throws: - Exception – When incorrect character appear in the name.
- IOException –
Returns: The name has been read.
/**
* Reads a xml name.
*
* The xml name must conform "Namespaces in XML" specification. Therefore
* the ':' character is not allowed in the name. This method should be used
* for PI and entity names which may not have a namespace according to the
* specification mentioned above.
*
* @param ns The true value turns namespace conformance on.
* @return The name has been read.
* @exception Exception When incorrect character appear in the name.
* @exception IOException
*/
protected String name(boolean ns)
throws Exception {
mBuffIdx = -1;
bname(ns);
return new String(mBuff, 1, mBuffIdx);
}
Reads a qualified xml name.
The characters of a qualified name is an array of characters. The first
(chars[0]) character is the index of the colon character which separates
the prefix from the local name. If the index is zero, the name does not
contain separator or the parser works in the namespace unaware mode. The
length of qualified name is the length of the array minus one.
Params: - ns – The true value turns namespace conformance on.
Throws: - Exception – When incorrect character appear in the name.
- IOException –
Returns: The characters of a qualified name.
/**
* Reads a qualified xml name.
*
* The characters of a qualified name is an array of characters. The first
* (chars[0]) character is the index of the colon character which separates
* the prefix from the local name. If the index is zero, the name does not
* contain separator or the parser works in the namespace unaware mode. The
* length of qualified name is the length of the array minus one.
*
* @param ns The true value turns namespace conformance on.
* @return The characters of a qualified name.
* @exception Exception When incorrect character appear in the name.
* @exception IOException
*/
protected char[] qname(boolean ns)
throws Exception {
mBuffIdx = -1;
bname(ns);
char chars[] = new char[mBuffIdx + 1];
System.arraycopy(mBuff, 0, chars, 0, mBuffIdx + 1);
return chars;
}
Reads the public or/and system identifiers.
Params: - inp – The input object.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Reads the public or/and system identifiers.
*
* @param inp The input object.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private void pubsys(Input inp)
throws Exception {
Pair pair = pubsys(' ');
inp.pubid = pair.name;
inp.sysid = pair.value;
del(pair);
}
Reads the public or/and system identifiers.
Params: - flag – The 'N' allows public id be without system id.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
Returns: The public or/and system identifiers pair.
/**
* Reads the public or/and system identifiers.
*
* @param flag The 'N' allows public id be without system id.
* @return The public or/and system identifiers pair.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private Pair pubsys(char flag) throws Exception {
Pair ids = pair(null);
String str = name(false);
if ("PUBLIC".equals(str) == true) {
bqstr('i'); // non-CDATA normalization [#4.2.2]
ids.name = new String(mBuff, 1, mBuffIdx);
switch (wsskip()) {
case '\"':
case '\'':
bqstr(' ');
ids.value = new String(mBuff, 1, mBuffIdx);
break;
case EOS:
panic(FAULT);
default:
if (flag != 'N') // [#4.7]
{
panic(FAULT);
}
ids.value = null;
break;
}
return ids;
} else if ("SYSTEM".equals(str) == true) {
ids.name = null;
bqstr(' ');
ids.value = new String(mBuff, 1, mBuffIdx);
return ids;
}
panic(FAULT);
return null;
}
Reads an attribute value.
The grammar this method can read is:
eqstr := S "=" qstr
qstr := S ("'" string "'") | ('"' string '"')
This method resolves entities
inside a string unless the parser parses DTD.
Params: - flag – The '=' character forces the method to accept the '='
character before quoted string and read the following string as not an
attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization;
'-' - not an attribute value; 'd' - in DTD context.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
Returns: The content of the quoted strign as a string.
/**
* Reads an attribute value.
*
* The grammar this method can read is:
* <pre>{@code
* eqstr := S "=" qstr
* qstr := S ("'" string "'") | ('"' string '"')
* }</pre>
* This method resolves entities
* inside a string unless the parser parses DTD.
*
* @param flag The '=' character forces the method to accept the '='
* character before quoted string and read the following string as not an
* attribute ('-'), 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization;
* '-' - not an attribute value; 'd' - in DTD context.
* @return The content of the quoted strign as a string.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
protected String eqstr(char flag) throws Exception {
if (flag == '=') {
wsskip();
if (getch() != '=') {
panic(FAULT);
}
}
bqstr((flag == '=') ? '-' : flag);
return new String(mBuff, 1, mBuffIdx);
}
Resoves an entity.
This method resolves built-in and character entity references. It is also
reports external entities to the application.
Params: - flag – The 'x' character forces the method to report a skipped
entity; 'i' character - indicates non-CDATA normalization.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
Returns: Name of unresolved entity or null
if entity had been
resolved successfully.
/**
* Resoves an entity.
*
* This method resolves built-in and character entity references. It is also
* reports external entities to the application.
*
* @param flag The 'x' character forces the method to report a skipped
* entity; 'i' character - indicates non-CDATA normalization.
* @return Name of unresolved entity or <code>null</code> if entity had been
* resolved successfully.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private String ent(char flag) throws Exception {
char ch;
int idx = mBuffIdx + 1;
Input inp = null;
String str = null;
mESt = 0x100; // reset the built-in entity recognizer
bappend('&');
for (short st = 0; st >= 0;) {
ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
switch (st) {
case 0: // the first character of the entity name
case 1: // read built-in entity name
switch (chtyp(ch)) {
case 'd':
case '.':
case '-':
if (st != 1) {
panic(FAULT);
}
case 'a':
case 'A':
case '_':
case 'X':
bappend(ch);
eappend(ch);
st = 1;
break;
case ':':
if (mIsNSAware != false) {
panic(FAULT);
}
bappend(ch);
eappend(ch);
st = 1;
break;
case ';':
if (mESt < 0x100) {
// The entity is a built-in entity
mBuffIdx = idx - 1;
bappend(mESt);
st = -1;
break;
} else if (mPh == PH_DTD) {
// In DTD entity declaration has to resolve character
// entities and include "as is" others. [#4.4.7]
bappend(';');
st = -1;
break;
}
// Convert an entity name to a string
str = new String(mBuff, idx + 1, mBuffIdx - idx);
inp = mEnt.get(str);
// Restore the buffer offset
mBuffIdx = idx - 1;
if (inp != null) {
if (inp.chars == null) {
// External entity
InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
if (is != null) {
push(new Input(BUFFSIZE_READER));
setinp(is);
mInp.pubid = inp.pubid;
mInp.sysid = inp.sysid;
str = null; // the entity is resolved
} else {
// Unresolved external entity
if (flag != 'x') {
panic(FAULT); // unknown entity within marckup
} // str is name of unresolved entity
}
} else {
// Internal entity
push(inp);
str = null; // the entity is resolved
}
} else {
// Unknown or general unparsed entity
if (flag != 'x') {
panic(FAULT); // unknown entity within marckup
} // str is name of unresolved entity
}
st = -1;
break;
case '#':
if (st != 0) {
panic(FAULT);
}
st = 2;
break;
default:
panic(FAULT);
}
break;
case 2: // read character entity
switch (chtyp(ch)) {
case 'd':
bappend(ch);
break;
case ';':
// Convert the character entity to a character
try {
int i = Integer.parseInt(
new String(mBuff, idx + 1, mBuffIdx - idx), 10);
if (i >= 0xffff) {
panic(FAULT);
}
ch = (char) i;
} catch (NumberFormatException nfe) {
panic(FAULT);
}
// Restore the buffer offset
mBuffIdx = idx - 1;
if (ch == ' ' || mInp.next != null) {
bappend(ch, flag);
} else {
bappend(ch);
}
st = -1;
break;
case 'a':
// If the entity buffer is empty and ch == 'x'
if ((mBuffIdx == idx) && (ch == 'x')) {
st = 3;
break;
}
default:
panic(FAULT);
}
break;
case 3: // read hex character entity
switch (chtyp(ch)) {
case 'A':
case 'a':
case 'd':
bappend(ch);
break;
case ';':
// Convert the character entity to a character
try {
int i = Integer.parseInt(
new String(mBuff, idx + 1, mBuffIdx - idx), 16);
if (i >= 0xffff) {
panic(FAULT);
}
ch = (char) i;
} catch (NumberFormatException nfe) {
panic(FAULT);
}
// Restore the buffer offset
mBuffIdx = idx - 1;
if (ch == ' ' || mInp.next != null) {
bappend(ch, flag);
} else {
bappend(ch);
}
st = -1;
break;
default:
panic(FAULT);
}
break;
default:
panic(FAULT);
}
}
return str;
}
Resoves a parameter entity.
This method resolves a parameter entity references. It is also reports
external entities to the application.
Params: - flag – The '-' instruct the method to do not set up surrounding
spaces [#4.4.8].
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Resoves a parameter entity.
*
* This method resolves a parameter entity references. It is also reports
* external entities to the application.
*
* @param flag The '-' instruct the method to do not set up surrounding
* spaces [#4.4.8].
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private void pent(char flag) throws Exception {
char ch;
int idx = mBuffIdx + 1;
Input inp = null;
String str = null;
bappend('%');
if (mPh != PH_DTD) // the DTD internal subset
{
return; // Not Recognized [#4.4.1]
} // Read entity name
bname(false);
str = new String(mBuff, idx + 2, mBuffIdx - idx - 1);
if (getch() != ';') {
panic(FAULT);
}
inp = mPEnt.get(str);
// Restore the buffer offset
mBuffIdx = idx - 1;
if (inp != null) {
if (inp.chars == null) {
// External parameter entity
InputSource is = resolveEnt(str, inp.pubid, inp.sysid);
if (is != null) {
if (flag != '-') {
bappend(' '); // tail space
}
push(new Input(BUFFSIZE_READER));
// BUG: there is no leading space! [#4.4.8]
setinp(is);
mInp.pubid = inp.pubid;
mInp.sysid = inp.sysid;
} else {
// Unresolved external parameter entity
skippedEnt("%" + str);
}
} else {
// Internal parameter entity
if (flag == '-') {
// No surrounding spaces
inp.chIdx = 1;
} else {
// Insert surrounding spaces
bappend(' '); // tail space
inp.chIdx = 0;
}
push(inp);
}
} else {
// Unknown parameter entity
skippedEnt("%" + str);
}
}
Recognizes and handles a namespace declaration.
This method identifies a type of namespace declaration if any and puts
new mapping on top of prefix stack.
Params: - name – The attribute qualified name (
name.value
is a
String
object which represents the attribute prefix). - value – The attribute value.
Returns: true
if a namespace declaration is recognized.
/**
* Recognizes and handles a namespace declaration.
*
* This method identifies a type of namespace declaration if any and puts
* new mapping on top of prefix stack.
*
* @param name The attribute qualified name (<code>name.value</code> is a
* <code>String</code> object which represents the attribute prefix).
* @param value The attribute value.
* @return <code>true</code> if a namespace declaration is recognized.
*/
private boolean isdecl(Pair name, String value) {
if (name.chars[0] == 0) {
if ("xmlns".equals(name.name) == true) {
// New default namespace declaration
mPref = pair(mPref);
mPref.list = mElm; // prefix owner element
mPref.value = value;
mPref.name = "";
mPref.chars = NONS;
mElm.num++; // namespace counter
return true;
}
} else {
if (name.eqpref(XMLNS) == true) {
// New prefix declaration
int len = name.name.length();
mPref = pair(mPref);
mPref.list = mElm; // prefix owner element
mPref.value = value;
mPref.name = name.name;
mPref.chars = new char[len + 1];
mPref.chars[0] = (char) (len + 1);
name.name.getChars(0, len, mPref.chars, 1);
mElm.num++; // namespace counter
return true;
}
}
return false;
}
Resolves a prefix.
Throws: - Exception – When mapping for specified prefix is not found.
Returns: The namespace assigned to the prefix.
/**
* Resolves a prefix.
*
* @return The namespace assigned to the prefix.
* @exception Exception When mapping for specified prefix is not found.
*/
private String rslv(char[] qname)
throws Exception {
for (Pair pref = mPref; pref != null; pref = pref.next) {
if (pref.eqpref(qname) == true) {
return pref.value;
}
}
if (qname[0] == 1) { // QNames like ':local'
for (Pair pref = mPref; pref != null; pref = pref.next) {
if (pref.chars[0] == 0) {
return pref.value;
}
}
}
panic(FAULT);
return null;
}
Skips xml white space characters.
This method skips white space characters (' ', '\t', '\n', '\r') and
looks ahead not white space character.
Throws: Returns: The first not white space look ahead character.
/**
* Skips xml white space characters.
*
* This method skips white space characters (' ', '\t', '\n', '\r') and
* looks ahead not white space character.
*
* @return The first not white space look ahead character.
* @exception IOException
*/
protected char wsskip()
throws IOException {
char ch;
while (true) {
// Read next character
ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
if (ch < 0x80) {
if (nmttyp[ch] != 3) // [ \t\n\r]
{
break;
}
} else {
break;
}
}
mChIdx--; // bkch();
return ch;
}
Reports document type.
Params: - name – The name of the entity.
- pubid – The public identifier of the entity or
null
. - sysid – The system identifier of the entity or
null
.
/**
* Reports document type.
*
* @param name The name of the entity.
* @param pubid The public identifier of the entity or <code>null</code>.
* @param sysid The system identifier of the entity or <code>null</code>.
*/
protected abstract void docType(String name, String pubid, String sysid)
throws SAXException;
Reports a comment.
Params: - text – The comment text starting from first charcater.
- length – The number of characters in comment.
/**
* Reports a comment.
*
* @param text The comment text starting from first charcater.
* @param length The number of characters in comment.
*/
protected abstract void comm(char[] text, int length);
Reports a processing instruction.
Params: - target – The processing instruction target name.
- body – The processing instruction body text.
/**
* Reports a processing instruction.
*
* @param target The processing instruction target name.
* @param body The processing instruction body text.
*/
protected abstract void pi(String target, String body)
throws Exception;
Reports new namespace prefix. The Namespace prefix (
mPref.name
) being declared and the Namespace URI (
mPref.value
) the prefix is mapped to. An empty string is
used for the default element namespace, which has no prefix.
/**
* Reports new namespace prefix. The Namespace prefix (
* <code>mPref.name</code>) being declared and the Namespace URI (
* <code>mPref.value</code>) the prefix is mapped to. An empty string is
* used for the default element namespace, which has no prefix.
*/
protected abstract void newPrefix()
throws Exception;
Reports skipped entity name.
Params: - name – The entity name.
/**
* Reports skipped entity name.
*
* @param name The entity name.
*/
protected abstract void skippedEnt(String name)
throws Exception;
Returns an
InputSource
for specified entity or
null
.
Params: - name – The name of the entity.
- pubid – The public identifier of the entity.
- sysid – The system identifier of the entity.
/**
* Returns an
* <code>InputSource</code> for specified entity or
* <code>null</code>.
*
* @param name The name of the entity.
* @param pubid The public identifier of the entity.
* @param sysid The system identifier of the entity.
*/
protected abstract InputSource resolveEnt(
String name, String pubid, String sysid)
throws Exception;
Reports notation declaration.
Params: - name – The notation's name.
- pubid – The notation's public identifier, or null if none was given.
- sysid – The notation's system identifier, or null if none was given.
/**
* Reports notation declaration.
*
* @param name The notation's name.
* @param pubid The notation's public identifier, or null if none was given.
* @param sysid The notation's system identifier, or null if none was given.
*/
protected abstract void notDecl(String name, String pubid, String sysid)
throws Exception;
Reports unparsed entity name.
Params: - name – The unparsed entity's name.
- pubid – The entity's public identifier, or null if none was given.
- sysid – The entity's system identifier.
- notation – The name of the associated notation.
/**
* Reports unparsed entity name.
*
* @param name The unparsed entity's name.
* @param pubid The entity's public identifier, or null if none was given.
* @param sysid The entity's system identifier.
* @param notation The name of the associated notation.
*/
protected abstract void unparsedEntDecl(
String name, String pubid, String sysid, String notation)
throws Exception;
Notifies the handler about fatal parsing error.
Params: - msg – The problem description message.
/**
* Notifies the handler about fatal parsing error.
*
* @param msg The problem description message.
*/
protected abstract void panic(String msg)
throws Exception;
Reads a qualified xml name.
This is low level routine which leaves a qName in the buffer. The
characters of a qualified name is an array of characters. The first
(chars[0]) character is the index of the colon character which separates
the prefix from the local name. If the index is zero, the name does not
contain separator or the parser works in the namespace unaware mode. The
length of qualified name is the length of the array minus one.
Params: - ns – The true value turns namespace conformance on.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Reads a qualified xml name.
*
* This is low level routine which leaves a qName in the buffer. The
* characters of a qualified name is an array of characters. The first
* (chars[0]) character is the index of the colon character which separates
* the prefix from the local name. If the index is zero, the name does not
* contain separator or the parser works in the namespace unaware mode. The
* length of qualified name is the length of the array minus one.
*
* @param ns The true value turns namespace conformance on.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private void bname(boolean ns)
throws Exception {
char ch;
char type;
mBuffIdx++; // allocate a char for colon offset
int bqname = mBuffIdx;
int bcolon = bqname;
int bchidx = bqname + 1;
int bstart = bchidx;
int cstart = mChIdx;
short st = (short) ((ns == true) ? 0 : 2);
while (true) {
// Read next character
if (mChIdx >= mChLen) {
bcopy(cstart, bstart);
getch();
mChIdx--; // bkch();
cstart = mChIdx;
bstart = bchidx;
}
ch = mChars[mChIdx++];
type = (char) 0; // [X]
if (ch < 0x80) {
type = (char) nmttyp[ch];
} else if (ch == EOS) {
panic(FAULT);
}
// Parse QName
switch (st) {
case 0: // read the first char of the prefix
case 2: // read the first char of the suffix
switch (type) {
case 0: // [aA_X]
bchidx++; // append char to the buffer
st++; // (st == 0)? 1: 3;
break;
case 1: // [:]
mChIdx--; // bkch();
st++; // (st == 0)? 1: 3;
break;
default:
panic(FAULT);
}
break;
case 1: // read the prefix
case 3: // read the suffix
switch (type) {
case 0: // [aA_X]
case 2: // [.-d]
bchidx++; // append char to the buffer
break;
case 1: // [:]
bchidx++; // append char to the buffer
if (ns == true) {
if (bcolon != bqname) {
panic(FAULT); // it must be only one colon
}
bcolon = bchidx - 1;
if (st == 1) {
st = 2;
}
}
break;
default:
mChIdx--; // bkch();
bcopy(cstart, bstart);
mBuff[bqname] = (char) (bcolon - bqname);
return;
}
break;
default:
panic(FAULT);
}
}
}
Reads a nmtoken.
This is low level routine which leaves a nmtoken in the buffer.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Reads a nmtoken.
*
* This is low level routine which leaves a nmtoken in the buffer.
*
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private void bntok() throws Exception {
char ch;
mBuffIdx = -1;
bappend((char) 0); // default offset to the colon char
while (true) {
ch = getch();
switch (chtyp(ch)) {
case 'a':
case 'A':
case 'd':
case '.':
case ':':
case '-':
case '_':
case 'X':
bappend(ch);
break;
case 'Z':
panic(FAULT);
default:
bkch();
return;
}
}
}
Recognizes a keyword.
This is low level routine which recognizes one of keywords in the buffer.
Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN -
t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED -
Q IMPLIED - I FIXED - F
Throws: - Exception – is parser specific exception form panic method.
- IOException –
Returns: an id of a keyword or '?'.
/**
* Recognizes a keyword.
*
* This is low level routine which recognizes one of keywords in the buffer.
* Keyword Id ID - i IDREF - r IDREFS - R ENTITY - n ENTITIES - N NMTOKEN -
* t NMTOKENS - T ELEMENT - e ATTLIST - a NOTATION - o CDATA - c REQUIRED -
* Q IMPLIED - I FIXED - F
*
* @return an id of a keyword or '?'.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private char bkeyword()
throws Exception {
String str = new String(mBuff, 1, mBuffIdx);
switch (str.length()) {
case 2: // ID
return ("ID".equals(str) == true) ? 'i' : '?';
case 5: // IDREF, CDATA, FIXED
switch (mBuff[1]) {
case 'I':
return ("IDREF".equals(str) == true) ? 'r' : '?';
case 'C':
return ("CDATA".equals(str) == true) ? 'c' : '?';
case 'F':
return ("FIXED".equals(str) == true) ? 'F' : '?';
default:
break;
}
break;
case 6: // IDREFS, ENTITY
switch (mBuff[1]) {
case 'I':
return ("IDREFS".equals(str) == true) ? 'R' : '?';
case 'E':
return ("ENTITY".equals(str) == true) ? 'n' : '?';
default:
break;
}
break;
case 7: // NMTOKEN, IMPLIED, ATTLIST, ELEMENT
switch (mBuff[1]) {
case 'I':
return ("IMPLIED".equals(str) == true) ? 'I' : '?';
case 'N':
return ("NMTOKEN".equals(str) == true) ? 't' : '?';
case 'A':
return ("ATTLIST".equals(str) == true) ? 'a' : '?';
case 'E':
return ("ELEMENT".equals(str) == true) ? 'e' : '?';
default:
break;
}
break;
case 8: // ENTITIES, NMTOKENS, NOTATION, REQUIRED
switch (mBuff[2]) {
case 'N':
return ("ENTITIES".equals(str) == true) ? 'N' : '?';
case 'M':
return ("NMTOKENS".equals(str) == true) ? 'T' : '?';
case 'O':
return ("NOTATION".equals(str) == true) ? 'o' : '?';
case 'E':
return ("REQUIRED".equals(str) == true) ? 'Q' : '?';
default:
break;
}
break;
default:
break;
}
return '?';
}
Reads a single or double quotted string in to the buffer.
This method resolves entities inside a string unless the parser parses
DTD.
Params: - flag – 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' -
not an attribute value; 'd' - in DTD context.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
/**
* Reads a single or double quotted string in to the buffer.
*
* This method resolves entities inside a string unless the parser parses
* DTD.
*
* @param flag 'c' - CDATA, 'i' - non CDATA, ' ' - no normalization; '-' -
* not an attribute value; 'd' - in DTD context.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
@SuppressWarnings("fallthrough")
private void bqstr(char flag) throws Exception {
Input inp = mInp; // remember the original input
mBuffIdx = -1;
bappend((char) 0); // default offset to the colon char
char ch;
for (short st = 0; st >= 0;) {
ch = (mChIdx < mChLen) ? mChars[mChIdx++] : getch();
switch (st) {
case 0: // read a single or double quote
switch (ch) {
case ' ':
case '\n':
case '\r':
case '\t':
break;
case '\'':
st = 2; // read a single quoted string
break;
case '\"':
st = 3; // read a double quoted string
break;
default:
panic(FAULT);
break;
}
break;
case 2: // read a single quoted string
case 3: // read a double quoted string
switch (ch) {
case '\'':
if ((st == 2) && (mInp == inp)) {
st = -1;
} else {
bappend(ch);
}
break;
case '\"':
if ((st == 3) && (mInp == inp)) {
st = -1;
} else {
bappend(ch);
}
break;
case '&':
if (flag != 'd') {
ent(flag);
} else {
bappend(ch);
}
break;
case '%':
if (flag == 'd') {
pent('-');
} else {
bappend(ch);
}
break;
case '<':
if ((flag == '-') || (flag == 'd')) {
bappend(ch);
} else {
panic(FAULT);
}
break;
case EOS: // EOS before single/double quote
panic(FAULT);
case '\r': // EOL processing [#2.11 & #3.3.3]
if (flag != ' ' && mInp.next == null) {
if (getch() != '\n') {
bkch();
}
ch = '\n';
}
default:
bappend(ch, flag);
break;
}
break;
default:
panic(FAULT);
}
}
// There is maximum one space at the end of the string in
// i-mode (non CDATA normalization) and it has to be removed.
if ((flag == 'i') && (mBuff[mBuffIdx] == ' ')) {
mBuffIdx -= 1;
}
}
Reports characters and empties the parser's buffer. This method is called
only if parser is going to return control to the main loop. This means
that this method may use parser buffer to report white space without
copying characters to temporary buffer.
/**
* Reports characters and empties the parser's buffer. This method is called
* only if parser is going to return control to the main loop. This means
* that this method may use parser buffer to report white space without
* copying characters to temporary buffer.
*/
protected abstract void bflash()
throws Exception;
Reports white space characters and empties the parser's buffer. This
method is called only if parser is going to return control to the main
loop. This means that this method may use parser buffer to report white
space without copying characters to temporary buffer.
/**
* Reports white space characters and empties the parser's buffer. This
* method is called only if parser is going to return control to the main
* loop. This means that this method may use parser buffer to report white
* space without copying characters to temporary buffer.
*/
protected abstract void bflash_ws()
throws Exception;
Appends a character to parser's buffer with normalization.
Params: - ch – The character to append to the buffer.
- mode – The normalization mode.
/**
* Appends a character to parser's buffer with normalization.
*
* @param ch The character to append to the buffer.
* @param mode The normalization mode.
*/
private void bappend(char ch, char mode) {
// This implements attribute value normalization as
// described in the XML specification [#3.3.3].
switch (mode) {
case 'i': // non CDATA normalization
switch (ch) {
case ' ':
case '\n':
case '\r':
case '\t':
if ((mBuffIdx > 0) && (mBuff[mBuffIdx] != ' ')) {
bappend(' ');
}
return;
default:
break;
}
break;
case 'c': // CDATA normalization
switch (ch) {
case '\n':
case '\r':
case '\t':
ch = ' ';
break;
default:
break;
}
break;
default: // no normalization
break;
}
mBuffIdx++;
if (mBuffIdx < mBuff.length) {
mBuff[mBuffIdx] = ch;
} else {
mBuffIdx--;
bappend(ch);
}
}
Appends a character to parser's buffer.
Params: - ch – The character to append to the buffer.
/**
* Appends a character to parser's buffer.
*
* @param ch The character to append to the buffer.
*/
private void bappend(char ch) {
try {
mBuff[++mBuffIdx] = ch;
} catch (Exception exp) {
// Double the buffer size
char buff[] = new char[mBuff.length << 1];
System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
mBuff = buff;
mBuff[mBuffIdx] = ch;
}
}
Appends (mChIdx - cidx) characters from character buffer (mChars) to
parser's buffer (mBuff).
Params: - cidx – The character buffer (mChars) start index.
- bidx – The parser buffer (mBuff) start index.
/**
* Appends (mChIdx - cidx) characters from character buffer (mChars) to
* parser's buffer (mBuff).
*
* @param cidx The character buffer (mChars) start index.
* @param bidx The parser buffer (mBuff) start index.
*/
private void bcopy(int cidx, int bidx) {
int length = mChIdx - cidx;
if ((bidx + length + 1) >= mBuff.length) {
// Expand the buffer
char buff[] = new char[mBuff.length + length];
System.arraycopy(mBuff, 0, buff, 0, mBuff.length);
mBuff = buff;
}
System.arraycopy(mChars, cidx, mBuff, bidx, length);
mBuffIdx += length;
}
Recognizes the built-in entities lt, gt, amp,
apos, quot. The initial state is 0x100. Any state belowe
0x100 is a built-in entity replacement character.
Params: - ch – the next character of an entity name.
/**
* Recognizes the built-in entities <i>lt</i>, <i>gt</i>, <i>amp</i>,
* <i>apos</i>, <i>quot</i>. The initial state is 0x100. Any state belowe
* 0x100 is a built-in entity replacement character.
*
* @param ch the next character of an entity name.
*/
@SuppressWarnings("fallthrough")
private void eappend(char ch) {
switch (mESt) {
case 0x100: // "l" or "g" or "a" or "q"
switch (ch) {
case 'l':
mESt = 0x101;
break;
case 'g':
mESt = 0x102;
break;
case 'a':
mESt = 0x103;
break;
case 'q':
mESt = 0x107;
break;
default:
mESt = 0x200;
break;
}
break;
case 0x101: // "lt"
mESt = (ch == 't') ? '<' : (char) 0x200;
break;
case 0x102: // "gt"
mESt = (ch == 't') ? '>' : (char) 0x200;
break;
case 0x103: // "am" or "ap"
switch (ch) {
case 'm':
mESt = 0x104;
break;
case 'p':
mESt = 0x105;
break;
default:
mESt = 0x200;
break;
}
break;
case 0x104: // "amp"
mESt = (ch == 'p') ? '&' : (char) 0x200;
break;
case 0x105: // "apo"
mESt = (ch == 'o') ? (char) 0x106 : (char) 0x200;
break;
case 0x106: // "apos"
mESt = (ch == 's') ? '\'' : (char) 0x200;
break;
case 0x107: // "qu"
mESt = (ch == 'u') ? (char) 0x108 : (char) 0x200;
break;
case 0x108: // "quo"
mESt = (ch == 'o') ? (char) 0x109 : (char) 0x200;
break;
case 0x109: // "quot"
mESt = (ch == 't') ? '\"' : (char) 0x200;
break;
case '<': // "lt"
case '>': // "gt"
case '&': // "amp"
case '\'': // "apos"
case '\"': // "quot"
mESt = 0x200;
default:
break;
}
}
Sets up a new input source on the top of the input stack. Note, the first
byte returned by the entity's byte stream has to be the first byte in the
entity. However, the parser does not expect the byte order mask in both
cases when encoding is provided by the input source.
Params: - is – A new input source to set up.
Throws: - IOException – If any IO errors occur.
- Exception – is parser specific exception form panic method.
/**
* Sets up a new input source on the top of the input stack. Note, the first
* byte returned by the entity's byte stream has to be the first byte in the
* entity. However, the parser does not expect the byte order mask in both
* cases when encoding is provided by the input source.
*
* @param is A new input source to set up.
* @exception IOException If any IO errors occur.
* @exception Exception is parser specific exception form panic method.
*/
protected void setinp(InputSource is)
throws Exception {
Reader reader = null;
mChIdx = 0;
mChLen = 0;
mChars = mInp.chars;
mInp.src = null;
if (mPh < PH_DOC_START) {
mIsSAlone = false; // default [#2.9]
}
mIsSAloneSet = false;
if (is.getCharacterStream() != null) {
// Ignore encoding in the xml text decl.
reader = is.getCharacterStream();
xml(reader);
} else if (is.getByteStream() != null) {
String expenc;
if (is.getEncoding() != null) {
// Ignore encoding in the xml text decl.
expenc = is.getEncoding().toUpperCase();
if (expenc.equals("UTF-16")) {
reader = bom(is.getByteStream(), 'U'); // UTF-16 [#4.3.3]
} else {
reader = enc(expenc, is.getByteStream());
}
xml(reader);
} else {
// Get encoding from BOM or the xml text decl.
reader = bom(is.getByteStream(), ' ');
/**
* [#4.3.3] requires BOM for UTF-16, however, it's not uncommon
* that it may be missing. A mature technique exists in Xerces
* to further check for possible UTF-16 encoding
*/
if (reader == null) {
reader = utf16(is.getByteStream());
}
if (reader == null) {
// Encoding is defined by the xml text decl.
reader = enc("UTF-8", is.getByteStream());
expenc = xml(reader);
if (!expenc.equals("UTF-8")) {
if (expenc.startsWith("UTF-16")) {
panic(FAULT); // UTF-16 must have BOM [#4.3.3]
}
reader = enc(expenc, is.getByteStream());
}
} else {
// Encoding is defined by the BOM.
xml(reader);
}
}
} else {
// There is no support for public/system identifiers.
panic(FAULT);
}
mInp.src = reader;
mInp.pubid = is.getPublicId();
mInp.sysid = is.getSystemId();
}
Determines the entity encoding.
This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the
first byte returned by the entity's byte stream has to be the first byte
in the entity. Also, there is no support for UCS-4.
Params: - is – A byte stream of the entity.
- hint – An encoding hint, character U means UTF-16.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
Returns: a reader constructed from the BOM or UTF-8 by default.
/**
* Determines the entity encoding.
*
* This method gets encoding from Byte Order Mask [#4.3.3] if any. Note, the
* first byte returned by the entity's byte stream has to be the first byte
* in the entity. Also, there is no support for UCS-4.
*
* @param is A byte stream of the entity.
* @param hint An encoding hint, character U means UTF-16.
* @return a reader constructed from the BOM or UTF-8 by default.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private Reader bom(InputStream is, char hint)
throws Exception {
int val = is.read();
switch (val) {
case 0xef: // UTF-8
if (hint == 'U') // must be UTF-16
{
panic(FAULT);
}
if (is.read() != 0xbb) {
panic(FAULT);
}
if (is.read() != 0xbf) {
panic(FAULT);
}
return new ReaderUTF8(is);
case 0xfe: // UTF-16, big-endian
if (is.read() != 0xff) {
panic(FAULT);
}
return new ReaderUTF16(is, 'b');
case 0xff: // UTF-16, little-endian
if (is.read() != 0xfe) {
panic(FAULT);
}
return new ReaderUTF16(is, 'l');
case -1:
mChars[mChIdx++] = EOS;
return new ReaderUTF8(is);
default:
if (hint == 'U') // must be UTF-16
{
panic(FAULT);
}
// Read the rest of UTF-8 character
switch (val & 0xf0) {
case 0xc0:
case 0xd0:
mChars[mChIdx++] = (char) (((val & 0x1f) << 6) | (is.read() & 0x3f));
break;
case 0xe0:
mChars[mChIdx++] = (char) (((val & 0x0f) << 12)
| ((is.read() & 0x3f) << 6) | (is.read() & 0x3f));
break;
case 0xf0: // UCS-4 character
throw new UnsupportedEncodingException();
default:
mChars[mChIdx++] = (char) val;
break;
}
return null;
}
}
Using a mature technique from Xerces, this method checks further after
the bom method above to see if the encoding is UTF-16
Params: - is – A byte stream of the entity.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
Returns: a reader, may be null
/**
* Using a mature technique from Xerces, this method checks further after
* the bom method above to see if the encoding is UTF-16
*
* @param is A byte stream of the entity.
* @return a reader, may be null
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private Reader utf16(InputStream is)
throws Exception {
if (mChIdx != 0) {
//The bom method has read ONE byte into the buffer.
byte b0 = (byte)mChars[0];
if (b0 == 0x00 || b0 == 0x3C) {
int b1 = is.read();
int b2 = is.read();
int b3 = is.read();
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
// UTF-16, big-endian, no BOM
mChars[0] = (char)(b1);
mChars[mChIdx++] = (char)(b3);
return new ReaderUTF16(is, 'b');
} else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
// UTF-16, little-endian, no BOM
mChars[0] = (char)(b0);
mChars[mChIdx++] = (char)(b2);
return new ReaderUTF16(is, 'l');
} else {
/**not every InputStream supports reset, so we have to remember
* the state for further parsing
**/
mChars[0] = (char)(b0);
mChars[mChIdx++] = (char)(b1);
mChars[mChIdx++] = (char)(b2);
mChars[mChIdx++] = (char)(b3);
}
}
}
return null;
}
Parses the xml text declaration.
This method gets encoding from the xml text declaration [#4.3.1] if any.
The method assumes the buffer (mChars) is big enough to accommodate whole
xml text declaration.
Params: - reader – is entity reader.
Throws: - Exception – is parser specific exception form panic method.
- IOException –
Returns: The xml text declaration encoding or default UTF-8 encoding.
/**
* Parses the xml text declaration.
*
* This method gets encoding from the xml text declaration [#4.3.1] if any.
* The method assumes the buffer (mChars) is big enough to accommodate whole
* xml text declaration.
*
* @param reader is entity reader.
* @return The xml text declaration encoding or default UTF-8 encoding.
* @exception Exception is parser specific exception form panic method.
* @exception IOException
*/
private String xml(Reader reader)
throws Exception {
String str = null;
String enc = "UTF-8";
char ch;
int val;
short st = 0;
int byteRead = mChIdx; //number of bytes read prior to entering this method
while (st >= 0 && mChIdx < mChars.length) {
if (st < byteRead) {
ch = mChars[st];
} else {
ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
mChars[mChIdx++] = ch;
}
switch (st) {
case 0: // read '<' of xml declaration
switch (ch) {
case '<':
st = 1;
break;
case 0xfeff: // the byte order mask
ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
mChars[mChIdx - 1] = ch;
st = (short) ((ch == '<') ? 1 : -1);
break;
default:
st = -1;
break;
}
break;
case 1: // read '?' of xml declaration [#4.3.1]
st = (short) ((ch == '?') ? 2 : -1);
break;
case 2: // read 'x' of xml declaration [#4.3.1]
st = (short) ((ch == 'x') ? 3 : -1);
break;
case 3: // read 'm' of xml declaration [#4.3.1]
st = (short) ((ch == 'm') ? 4 : -1);
break;
case 4: // read 'l' of xml declaration [#4.3.1]
st = (short) ((ch == 'l') ? 5 : -1);
break;
case 5: // read white space after 'xml'
switch (ch) {
case ' ':
case '\t':
case '\r':
case '\n':
st = 6;
break;
default:
st = -1;
break;
}
break;
case 6: // read content of xml declaration
switch (ch) {
case '?':
st = 7;
break;
case EOS:
st = -2;
break;
default:
break;
}
break;
case 7: // read '>' after '?' of xml declaration
switch (ch) {
case '>':
case EOS:
st = -2;
break;
default:
st = 6;
break;
}
break;
default:
panic(FAULT);
break;
}
}
mChLen = mChIdx;
mChIdx = 0;
// If there is no xml text declaration, the encoding is default.
if (st == -1) {
return enc;
}
mChIdx = 5; // the first white space after "<?xml"
// Parse the xml text declaration
for (st = 0; st >= 0;) {
ch = getch();
switch (st) {
case 0: // skip spaces after the xml declaration name
if (chtyp(ch) != ' ') {
bkch();
st = 1;
}
break;
case 1: // read xml declaration version
case 2: // read xml declaration encoding or standalone
case 3: // read xml declaration standalone
switch (chtyp(ch)) {
case 'a':
case 'A':
case '_':
bkch();
str = name(false).toLowerCase();
if ("version".equals(str) == true) {
if (st != 1) {
panic(FAULT);
}
if ("1.0".equals(eqstr('=')) != true) {
panic(FAULT);
}
mInp.xmlver = 0x0100;
st = 2;
} else if ("encoding".equals(str) == true) {
if (st != 2) {
panic(FAULT);
}
mInp.xmlenc = eqstr('=').toUpperCase();
enc = mInp.xmlenc;
st = 3;
} else if ("standalone".equals(str) == true) {
if ((st == 1) || (mPh >= PH_DOC_START)) // [#4.3.1]
{
panic(FAULT);
}
str = eqstr('=').toLowerCase();
// Check the 'standalone' value and use it [#5.1]
if (str.equals("yes") == true) {
mIsSAlone = true;
} else if (str.equals("no") == true) {
mIsSAlone = false;
} else {
panic(FAULT);
}
mIsSAloneSet = true;
st = 4;
} else {
panic(FAULT);
}
break;
case ' ':
break;
case '?':
if (st == 1) {
panic(FAULT);
}
bkch();
st = 4;
break;
default:
panic(FAULT);
}
break;
case 4: // end of xml declaration
switch (chtyp(ch)) {
case '?':
if (getch() != '>') {
panic(FAULT);
}
if (mPh <= PH_DOC_START) {
mPh = PH_MISC_DTD; // misc before DTD
}
st = -1;
break;
case ' ':
break;
default:
panic(FAULT);
}
break;
default:
panic(FAULT);
}
}
return enc;
}
Sets up the document reader.
Params: - name – an encoding name.
- is – the document byte input stream.
Throws: Returns: a reader constructed from encoding name and input stream.
/**
* Sets up the document reader.
*
* @param name an encoding name.
* @param is the document byte input stream.
* @return a reader constructed from encoding name and input stream.
* @exception UnsupportedEncodingException
*/
private Reader enc(String name, InputStream is)
throws UnsupportedEncodingException {
// DO NOT CLOSE current reader if any!
if (name.equals("UTF-8")) {
return new ReaderUTF8(is);
} else if (name.equals("UTF-16LE")) {
return new ReaderUTF16(is, 'l');
} else if (name.equals("UTF-16BE")) {
return new ReaderUTF16(is, 'b');
} else {
return new InputStreamReader(is, name);
}
}
Sets up current input on the top of the input stack.
Params: - inp – A new input to set up.
/**
* Sets up current input on the top of the input stack.
*
* @param inp A new input to set up.
*/
protected void push(Input inp) {
mInp.chLen = mChLen;
mInp.chIdx = mChIdx;
inp.next = mInp;
mInp = inp;
mChars = inp.chars;
mChLen = inp.chLen;
mChIdx = inp.chIdx;
}
Restores previous input on the top of the input stack.
/**
* Restores previous input on the top of the input stack.
*/
protected void pop() {
if (mInp.src != null) {
try {
mInp.src.close();
} catch (IOException ioe) {
}
mInp.src = null;
}
mInp = mInp.next;
if (mInp != null) {
mChars = mInp.chars;
mChLen = mInp.chLen;
mChIdx = mInp.chIdx;
} else {
mChars = null;
mChLen = 0;
mChIdx = 0;
}
}
Maps a character to its type.
Possible character type values are:
- ' ' - for any kind of whitespace character;
- 'a' - for any lower case alphabetical character value;
- 'A' - for any upper case alphabetical character value;
- 'd' - for any decimal digit character value;
- 'z' - for any character less than ' ' except '\t', '\n', '\r';
- 'X' - for any not ASCII character;
- 'Z' - for EOS character.
An ASCII (7 bit) character which does not fall in any category
listed above is mapped to itself.
Params: - ch – The character to map.
Returns: The type of character.
/**
* Maps a character to its type.
*
* Possible character type values are:
* <ul>
* <li>' ' - for any kind of whitespace character;</li>
* <li>'a' - for any lower case alphabetical character value;</li>
* <li>'A' - for any upper case alphabetical character value;</li>
* <li>'d' - for any decimal digit character value;</li>
* <li>'z' - for any character less than ' ' except '\t', '\n', '\r';</li>
* <li>'X' - for any not ASCII character;</li>
* <li>'Z' - for EOS character.</li>
* </ul>
* An ASCII (7 bit) character which does not fall in any category
* listed above is mapped to itself.
*
* @param ch The character to map.
* @return The type of character.
*/
protected char chtyp(char ch) {
if (ch < 0x80) {
return (char) asctyp[ch];
}
return (ch != EOS) ? 'X' : 'Z';
}
Retrives the next character in the document.
Returns: The next character in the document.
/**
* Retrives the next character in the document.
*
* @return The next character in the document.
*/
protected char getch()
throws IOException {
if (mChIdx >= mChLen) {
if (mInp.src == null) {
pop(); // remove internal entity
return getch();
}
// Read new portion of the document characters
int Num = mInp.src.read(mChars, 0, mChars.length);
if (Num < 0) {
if (mInp != mDoc) {
pop(); // restore the previous input
return getch();
} else {
mChars[0] = EOS;
mChLen = 1;
}
} else {
mChLen = Num;
}
mChIdx = 0;
}
return mChars[mChIdx++];
}
Puts back the last read character.
This method MUST NOT be called more then once after each call of getch
method. /**
* Puts back the last read character.
*
* This method <strong>MUST NOT</strong> be called more then once after each
* call of {@link #getch getch} method.
*/
protected void bkch()
throws Exception {
if (mChIdx <= 0) {
panic(FAULT);
}
mChIdx--;
}
Sets the current character.
Params: - ch – The character to set.
/**
* Sets the current character.
*
* @param ch The character to set.
*/
protected void setch(char ch) {
mChars[mChIdx] = ch;
}
Finds a pair in the pair chain by a qualified name.
Params: - chain – The first element of the chain of pairs.
- qname – The qualified name.
Returns: A pair with the specified qualified name or null.
/**
* Finds a pair in the pair chain by a qualified name.
*
* @param chain The first element of the chain of pairs.
* @param qname The qualified name.
* @return A pair with the specified qualified name or null.
*/
protected Pair find(Pair chain, char[] qname) {
for (Pair pair = chain; pair != null; pair = pair.next) {
if (pair.eqname(qname) == true) {
return pair;
}
}
return null;
}
Provedes an instance of a pair.
Params: - next – The reference to a next pair.
Returns: An instance of a pair.
/**
* Provedes an instance of a pair.
*
* @param next The reference to a next pair.
* @return An instance of a pair.
*/
protected Pair pair(Pair next) {
Pair pair;
if (mDltd != null) {
pair = mDltd;
mDltd = pair.next;
} else {
pair = new Pair();
}
pair.next = next;
return pair;
}
Deletes an instance of a pair.
Params: - pair – The pair to delete.
Returns: A reference to the next pair in a chain.
/**
* Deletes an instance of a pair.
*
* @param pair The pair to delete.
* @return A reference to the next pair in a chain.
*/
protected Pair del(Pair pair) {
Pair next = pair.next;
pair.name = null;
pair.value = null;
pair.chars = null;
pair.list = null;
pair.next = mDltd;
mDltd = pair;
return next;
}
}